comparison src/share/vm/opto/matcher.cpp @ 6179:8c92982cbbc4

7119644: Increase superword's vector size up to 256 bits Summary: Increase vector size up to 256-bits for YMM AVX registers on x86. Reviewed-by: never, twisti, roland
author kvn
date Fri, 15 Jun 2012 01:25:19 -0700
parents 65149e74c706
children 1d7922586cf6
comparison
equal deleted inserted replaced
6146:eba1d5bce9e8 6179:8c92982cbbc4
1 /* 1 /*
2 * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. 2 * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 * 4 *
5 * This code is free software; you can redistribute it and/or modify it 5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as 6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
33 #include "opto/opcodes.hpp" 33 #include "opto/opcodes.hpp"
34 #include "opto/regmask.hpp" 34 #include "opto/regmask.hpp"
35 #include "opto/rootnode.hpp" 35 #include "opto/rootnode.hpp"
36 #include "opto/runtime.hpp" 36 #include "opto/runtime.hpp"
37 #include "opto/type.hpp" 37 #include "opto/type.hpp"
38 #include "opto/vectornode.hpp"
38 #include "runtime/atomic.hpp" 39 #include "runtime/atomic.hpp"
39 #include "runtime/os.hpp" 40 #include "runtime/os.hpp"
40 #ifdef TARGET_ARCH_MODEL_x86_32 41 #ifdef TARGET_ARCH_MODEL_x86_32
41 # include "adfiles/ad_x86_32.hpp" 42 # include "adfiles/ad_x86_32.hpp"
42 #endif 43 #endif
55 #ifdef TARGET_ARCH_MODEL_ppc 56 #ifdef TARGET_ARCH_MODEL_ppc
56 # include "adfiles/ad_ppc.hpp" 57 # include "adfiles/ad_ppc.hpp"
57 #endif 58 #endif
58 59
59 OptoReg::Name OptoReg::c_frame_pointer; 60 OptoReg::Name OptoReg::c_frame_pointer;
60
61
62
63 const int Matcher::base2reg[Type::lastype] = {
64 Node::NotAMachineReg,0,0, Op_RegI, Op_RegL, 0, Op_RegN,
65 Node::NotAMachineReg, Node::NotAMachineReg, /* tuple, array */
66 Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, /* the pointers */
67 0, 0/*abio*/,
68 Op_RegP /* Return address */, 0, /* the memories */
69 Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD,
70 0 /*bottom*/
71 };
72 61
73 const RegMask *Matcher::idealreg2regmask[_last_machine_leaf]; 62 const RegMask *Matcher::idealreg2regmask[_last_machine_leaf];
74 RegMask Matcher::mreg2regmask[_last_Mach_Reg]; 63 RegMask Matcher::mreg2regmask[_last_Mach_Reg];
75 RegMask Matcher::STACK_ONLY_mask; 64 RegMask Matcher::STACK_ONLY_mask;
76 RegMask Matcher::c_frame_ptr_mask; 65 RegMask Matcher::c_frame_ptr_mask;
105 idealreg2spillmask [Op_RegN] = NULL; 94 idealreg2spillmask [Op_RegN] = NULL;
106 idealreg2spillmask [Op_RegL] = NULL; 95 idealreg2spillmask [Op_RegL] = NULL;
107 idealreg2spillmask [Op_RegF] = NULL; 96 idealreg2spillmask [Op_RegF] = NULL;
108 idealreg2spillmask [Op_RegD] = NULL; 97 idealreg2spillmask [Op_RegD] = NULL;
109 idealreg2spillmask [Op_RegP] = NULL; 98 idealreg2spillmask [Op_RegP] = NULL;
99 idealreg2spillmask [Op_VecS] = NULL;
100 idealreg2spillmask [Op_VecD] = NULL;
101 idealreg2spillmask [Op_VecX] = NULL;
102 idealreg2spillmask [Op_VecY] = NULL;
110 103
111 idealreg2debugmask [Op_RegI] = NULL; 104 idealreg2debugmask [Op_RegI] = NULL;
112 idealreg2debugmask [Op_RegN] = NULL; 105 idealreg2debugmask [Op_RegN] = NULL;
113 idealreg2debugmask [Op_RegL] = NULL; 106 idealreg2debugmask [Op_RegL] = NULL;
114 idealreg2debugmask [Op_RegF] = NULL; 107 idealreg2debugmask [Op_RegF] = NULL;
115 idealreg2debugmask [Op_RegD] = NULL; 108 idealreg2debugmask [Op_RegD] = NULL;
116 idealreg2debugmask [Op_RegP] = NULL; 109 idealreg2debugmask [Op_RegP] = NULL;
110 idealreg2debugmask [Op_VecS] = NULL;
111 idealreg2debugmask [Op_VecD] = NULL;
112 idealreg2debugmask [Op_VecX] = NULL;
113 idealreg2debugmask [Op_VecY] = NULL;
117 114
118 idealreg2mhdebugmask[Op_RegI] = NULL; 115 idealreg2mhdebugmask[Op_RegI] = NULL;
119 idealreg2mhdebugmask[Op_RegN] = NULL; 116 idealreg2mhdebugmask[Op_RegN] = NULL;
120 idealreg2mhdebugmask[Op_RegL] = NULL; 117 idealreg2mhdebugmask[Op_RegL] = NULL;
121 idealreg2mhdebugmask[Op_RegF] = NULL; 118 idealreg2mhdebugmask[Op_RegF] = NULL;
122 idealreg2mhdebugmask[Op_RegD] = NULL; 119 idealreg2mhdebugmask[Op_RegD] = NULL;
123 idealreg2mhdebugmask[Op_RegP] = NULL; 120 idealreg2mhdebugmask[Op_RegP] = NULL;
121 idealreg2mhdebugmask[Op_VecS] = NULL;
122 idealreg2mhdebugmask[Op_VecD] = NULL;
123 idealreg2mhdebugmask[Op_VecX] = NULL;
124 idealreg2mhdebugmask[Op_VecY] = NULL;
124 125
125 debug_only(_mem_node = NULL;) // Ideal memory node consumed by mach node 126 debug_only(_mem_node = NULL;) // Ideal memory node consumed by mach node
126 } 127 }
127 128
128 //------------------------------warp_incoming_stk_arg------------------------ 129 //------------------------------warp_incoming_stk_arg------------------------
132 if( reg->is_stack() ) { // Stack slot argument? 133 if( reg->is_stack() ) { // Stack slot argument?
133 warped = OptoReg::add(_old_SP, reg->reg2stack() ); 134 warped = OptoReg::add(_old_SP, reg->reg2stack() );
134 warped = OptoReg::add(warped, C->out_preserve_stack_slots()); 135 warped = OptoReg::add(warped, C->out_preserve_stack_slots());
135 if( warped >= _in_arg_limit ) 136 if( warped >= _in_arg_limit )
136 _in_arg_limit = OptoReg::add(warped, 1); // Bump max stack slot seen 137 _in_arg_limit = OptoReg::add(warped, 1); // Bump max stack slot seen
137 if (!RegMask::can_represent(warped)) { 138 if (!RegMask::can_represent_arg(warped)) {
138 // the compiler cannot represent this method's calling sequence 139 // the compiler cannot represent this method's calling sequence
139 C->record_method_not_compilable_all_tiers("unsupported incoming calling sequence"); 140 C->record_method_not_compilable_all_tiers("unsupported incoming calling sequence");
140 return OptoReg::Bad; 141 return OptoReg::Bad;
141 } 142 }
142 return warped; 143 return warped;
300 // Compute highest outgoing stack argument as 301 // Compute highest outgoing stack argument as
301 // _new_SP + out_preserve_stack_slots + max(outgoing argument size). 302 // _new_SP + out_preserve_stack_slots + max(outgoing argument size).
302 _out_arg_limit = OptoReg::add(_new_SP, C->out_preserve_stack_slots()); 303 _out_arg_limit = OptoReg::add(_new_SP, C->out_preserve_stack_slots());
303 assert( is_even(_out_arg_limit), "out_preserve must be even" ); 304 assert( is_even(_out_arg_limit), "out_preserve must be even" );
304 305
305 if (!RegMask::can_represent(OptoReg::add(_out_arg_limit,-1))) { 306 if (!RegMask::can_represent_arg(OptoReg::add(_out_arg_limit,-1))) {
306 // the compiler cannot represent this method's calling sequence 307 // the compiler cannot represent this method's calling sequence
307 C->record_method_not_compilable("must be able to represent all call arguments in reg mask"); 308 C->record_method_not_compilable("must be able to represent all call arguments in reg mask");
308 } 309 }
309 310
310 if (C->failing()) return; // bailed out on incoming arg failure 311 if (C->failing()) return; // bailed out on incoming arg failure
426 // Disallow any debug info in outgoing argument areas by setting the 427 // Disallow any debug info in outgoing argument areas by setting the
427 // initial mask accordingly. 428 // initial mask accordingly.
428 void Matcher::init_first_stack_mask() { 429 void Matcher::init_first_stack_mask() {
429 430
430 // Allocate storage for spill masks as masks for the appropriate load type. 431 // Allocate storage for spill masks as masks for the appropriate load type.
431 RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * 3*6); 432 RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * (3*6+4));
432 433
433 idealreg2spillmask [Op_RegN] = &rms[0]; 434 idealreg2spillmask [Op_RegN] = &rms[0];
434 idealreg2spillmask [Op_RegI] = &rms[1]; 435 idealreg2spillmask [Op_RegI] = &rms[1];
435 idealreg2spillmask [Op_RegL] = &rms[2]; 436 idealreg2spillmask [Op_RegL] = &rms[2];
436 idealreg2spillmask [Op_RegF] = &rms[3]; 437 idealreg2spillmask [Op_RegF] = &rms[3];
449 idealreg2mhdebugmask[Op_RegL] = &rms[14]; 450 idealreg2mhdebugmask[Op_RegL] = &rms[14];
450 idealreg2mhdebugmask[Op_RegF] = &rms[15]; 451 idealreg2mhdebugmask[Op_RegF] = &rms[15];
451 idealreg2mhdebugmask[Op_RegD] = &rms[16]; 452 idealreg2mhdebugmask[Op_RegD] = &rms[16];
452 idealreg2mhdebugmask[Op_RegP] = &rms[17]; 453 idealreg2mhdebugmask[Op_RegP] = &rms[17];
453 454
455 idealreg2spillmask [Op_VecS] = &rms[18];
456 idealreg2spillmask [Op_VecD] = &rms[19];
457 idealreg2spillmask [Op_VecX] = &rms[20];
458 idealreg2spillmask [Op_VecY] = &rms[21];
459
454 OptoReg::Name i; 460 OptoReg::Name i;
455 461
456 // At first, start with the empty mask 462 // At first, start with the empty mask
457 C->FIRST_STACK_mask().Clear(); 463 C->FIRST_STACK_mask().Clear();
458 464
460 OptoReg::Name init = OptoReg::add(_old_SP, C->out_preserve_stack_slots()); 466 OptoReg::Name init = OptoReg::add(_old_SP, C->out_preserve_stack_slots());
461 for (i = init; i < _in_arg_limit; i = OptoReg::add(i,1)) 467 for (i = init; i < _in_arg_limit; i = OptoReg::add(i,1))
462 C->FIRST_STACK_mask().Insert(i); 468 C->FIRST_STACK_mask().Insert(i);
463 469
464 // Add in all bits past the outgoing argument area 470 // Add in all bits past the outgoing argument area
465 guarantee(RegMask::can_represent(OptoReg::add(_out_arg_limit,-1)), 471 guarantee(RegMask::can_represent_arg(OptoReg::add(_out_arg_limit,-1)),
466 "must be able to represent all call arguments in reg mask"); 472 "must be able to represent all call arguments in reg mask");
467 init = _out_arg_limit; 473 init = _out_arg_limit;
468 for (i = init; RegMask::can_represent(i); i = OptoReg::add(i,1)) 474 for (i = init; RegMask::can_represent(i); i = OptoReg::add(i,1))
469 C->FIRST_STACK_mask().Insert(i); 475 C->FIRST_STACK_mask().Insert(i);
470 476
471 // Finally, set the "infinite stack" bit. 477 // Finally, set the "infinite stack" bit.
472 C->FIRST_STACK_mask().set_AllStack(); 478 C->FIRST_STACK_mask().set_AllStack();
473 479
474 // Make spill masks. Registers for their class, plus FIRST_STACK_mask. 480 // Make spill masks. Registers for their class, plus FIRST_STACK_mask.
481 RegMask aligned_stack_mask = C->FIRST_STACK_mask();
482 // Keep spill masks aligned.
483 aligned_stack_mask.clear_to_pairs();
484 assert(aligned_stack_mask.is_AllStack(), "should be infinite stack");
485
486 *idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP];
475 #ifdef _LP64 487 #ifdef _LP64
476 *idealreg2spillmask[Op_RegN] = *idealreg2regmask[Op_RegN]; 488 *idealreg2spillmask[Op_RegN] = *idealreg2regmask[Op_RegN];
477 idealreg2spillmask[Op_RegN]->OR(C->FIRST_STACK_mask()); 489 idealreg2spillmask[Op_RegN]->OR(C->FIRST_STACK_mask());
490 idealreg2spillmask[Op_RegP]->OR(aligned_stack_mask);
491 #else
492 idealreg2spillmask[Op_RegP]->OR(C->FIRST_STACK_mask());
478 #endif 493 #endif
479 *idealreg2spillmask[Op_RegI] = *idealreg2regmask[Op_RegI]; 494 *idealreg2spillmask[Op_RegI] = *idealreg2regmask[Op_RegI];
480 idealreg2spillmask[Op_RegI]->OR(C->FIRST_STACK_mask()); 495 idealreg2spillmask[Op_RegI]->OR(C->FIRST_STACK_mask());
481 *idealreg2spillmask[Op_RegL] = *idealreg2regmask[Op_RegL]; 496 *idealreg2spillmask[Op_RegL] = *idealreg2regmask[Op_RegL];
482 idealreg2spillmask[Op_RegL]->OR(C->FIRST_STACK_mask()); 497 idealreg2spillmask[Op_RegL]->OR(aligned_stack_mask);
483 *idealreg2spillmask[Op_RegF] = *idealreg2regmask[Op_RegF]; 498 *idealreg2spillmask[Op_RegF] = *idealreg2regmask[Op_RegF];
484 idealreg2spillmask[Op_RegF]->OR(C->FIRST_STACK_mask()); 499 idealreg2spillmask[Op_RegF]->OR(C->FIRST_STACK_mask());
485 *idealreg2spillmask[Op_RegD] = *idealreg2regmask[Op_RegD]; 500 *idealreg2spillmask[Op_RegD] = *idealreg2regmask[Op_RegD];
486 idealreg2spillmask[Op_RegD]->OR(C->FIRST_STACK_mask()); 501 idealreg2spillmask[Op_RegD]->OR(aligned_stack_mask);
487 *idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP]; 502
488 idealreg2spillmask[Op_RegP]->OR(C->FIRST_STACK_mask()); 503 if (Matcher::vector_size_supported(T_BYTE,4)) {
489 504 *idealreg2spillmask[Op_VecS] = *idealreg2regmask[Op_VecS];
505 idealreg2spillmask[Op_VecS]->OR(C->FIRST_STACK_mask());
506 }
507 if (Matcher::vector_size_supported(T_FLOAT,2)) {
508 *idealreg2spillmask[Op_VecD] = *idealreg2regmask[Op_VecD];
509 idealreg2spillmask[Op_VecD]->OR(aligned_stack_mask);
510 }
511 if (Matcher::vector_size_supported(T_FLOAT,4)) {
512 aligned_stack_mask.clear_to_sets(RegMask::SlotsPerVecX);
513 assert(aligned_stack_mask.is_AllStack(), "should be infinite stack");
514 *idealreg2spillmask[Op_VecX] = *idealreg2regmask[Op_VecX];
515 idealreg2spillmask[Op_VecX]->OR(aligned_stack_mask);
516 }
517 if (Matcher::vector_size_supported(T_FLOAT,8)) {
518 aligned_stack_mask.clear_to_sets(RegMask::SlotsPerVecY);
519 assert(aligned_stack_mask.is_AllStack(), "should be infinite stack");
520 *idealreg2spillmask[Op_VecY] = *idealreg2regmask[Op_VecY];
521 idealreg2spillmask[Op_VecY]->OR(aligned_stack_mask);
522 }
490 if (UseFPUForSpilling) { 523 if (UseFPUForSpilling) {
491 // This mask logic assumes that the spill operations are 524 // This mask logic assumes that the spill operations are
492 // symmetric and that the registers involved are the same size. 525 // symmetric and that the registers involved are the same size.
493 // On sparc for instance we may have to use 64 bit moves will 526 // On sparc for instance we may have to use 64 bit moves will
494 // kill 2 registers when used with F0-F31. 527 // kill 2 registers when used with F0-F31.
805 idealreg2regmask[Op_RegI] = &spillI->out_RegMask(); 838 idealreg2regmask[Op_RegI] = &spillI->out_RegMask();
806 idealreg2regmask[Op_RegL] = &spillL->out_RegMask(); 839 idealreg2regmask[Op_RegL] = &spillL->out_RegMask();
807 idealreg2regmask[Op_RegF] = &spillF->out_RegMask(); 840 idealreg2regmask[Op_RegF] = &spillF->out_RegMask();
808 idealreg2regmask[Op_RegD] = &spillD->out_RegMask(); 841 idealreg2regmask[Op_RegD] = &spillD->out_RegMask();
809 idealreg2regmask[Op_RegP] = &spillP->out_RegMask(); 842 idealreg2regmask[Op_RegP] = &spillP->out_RegMask();
843
844 // Vector regmasks.
845 if (Matcher::vector_size_supported(T_BYTE,4)) {
846 TypeVect::VECTS = TypeVect::make(T_BYTE, 4);
847 MachNode *spillVectS = match_tree(new (C, 3) LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTS));
848 idealreg2regmask[Op_VecS] = &spillVectS->out_RegMask();
849 }
850 if (Matcher::vector_size_supported(T_FLOAT,2)) {
851 MachNode *spillVectD = match_tree(new (C, 3) LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTD));
852 idealreg2regmask[Op_VecD] = &spillVectD->out_RegMask();
853 }
854 if (Matcher::vector_size_supported(T_FLOAT,4)) {
855 MachNode *spillVectX = match_tree(new (C, 3) LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTX));
856 idealreg2regmask[Op_VecX] = &spillVectX->out_RegMask();
857 }
858 if (Matcher::vector_size_supported(T_FLOAT,8)) {
859 MachNode *spillVectY = match_tree(new (C, 3) LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTY));
860 idealreg2regmask[Op_VecY] = &spillVectY->out_RegMask();
861 }
810 } 862 }
811 863
812 #ifdef ASSERT 864 #ifdef ASSERT
813 static void match_alias_type(Compile* C, Node* n, Node* m) { 865 static void match_alias_type(Compile* C, Node* n, Node* m) {
814 if (!VerifyAliases) return; // do not go looking for trouble by default 866 if (!VerifyAliases) return; // do not go looking for trouble by default
1061 // Keep track of the largest numbered stack slot used for an arg. 1113 // Keep track of the largest numbered stack slot used for an arg.
1062 // Largest used slot per call-site indicates the amount of stack 1114 // Largest used slot per call-site indicates the amount of stack
1063 // that is killed by the call. 1115 // that is killed by the call.
1064 if( warped >= out_arg_limit_per_call ) 1116 if( warped >= out_arg_limit_per_call )
1065 out_arg_limit_per_call = OptoReg::add(warped,1); 1117 out_arg_limit_per_call = OptoReg::add(warped,1);
1066 if (!RegMask::can_represent(warped)) { 1118 if (!RegMask::can_represent_arg(warped)) {
1067 C->record_method_not_compilable_all_tiers("unsupported calling sequence"); 1119 C->record_method_not_compilable_all_tiers("unsupported calling sequence");
1068 return OptoReg::Bad; 1120 return OptoReg::Bad;
1069 } 1121 }
1070 return warped; 1122 return warped;
1071 } 1123 }
1249 // Since the max-per-method covers the max-per-call-site and debug info 1301 // Since the max-per-method covers the max-per-call-site and debug info
1250 // is excluded on the max-per-method basis, debug info cannot land in 1302 // is excluded on the max-per-method basis, debug info cannot land in
1251 // this killed area. 1303 // this killed area.
1252 uint r_cnt = mcall->tf()->range()->cnt(); 1304 uint r_cnt = mcall->tf()->range()->cnt();
1253 MachProjNode *proj = new (C, 1) MachProjNode( mcall, r_cnt+10000, RegMask::Empty, MachProjNode::fat_proj ); 1305 MachProjNode *proj = new (C, 1) MachProjNode( mcall, r_cnt+10000, RegMask::Empty, MachProjNode::fat_proj );
1254 if (!RegMask::can_represent(OptoReg::Name(out_arg_limit_per_call-1))) { 1306 if (!RegMask::can_represent_arg(OptoReg::Name(out_arg_limit_per_call-1))) {
1255 C->record_method_not_compilable_all_tiers("unsupported outgoing calling sequence"); 1307 C->record_method_not_compilable_all_tiers("unsupported outgoing calling sequence");
1256 } else { 1308 } else {
1257 for (int i = begin_out_arg_area; i < out_arg_limit_per_call; i++) 1309 for (int i = begin_out_arg_area; i < out_arg_limit_per_call; i++)
1258 proj->_rout.Insert(OptoReg::Name(i)); 1310 proj->_rout.Insert(OptoReg::Name(i));
1259 } 1311 }