Mercurial > hg > truffle
comparison src/share/vm/opto/matcher.cpp @ 6179:8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
Summary: Increase vector size up to 256-bits for YMM AVX registers on x86.
Reviewed-by: never, twisti, roland
author | kvn |
---|---|
date | Fri, 15 Jun 2012 01:25:19 -0700 |
parents | 65149e74c706 |
children | 1d7922586cf6 |
comparison
equal
deleted
inserted
replaced
6146:eba1d5bce9e8 | 6179:8c92982cbbc4 |
---|---|
1 /* | 1 /* |
2 * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. | 2 * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. |
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | 4 * |
5 * This code is free software; you can redistribute it and/or modify it | 5 * This code is free software; you can redistribute it and/or modify it |
6 * under the terms of the GNU General Public License version 2 only, as | 6 * under the terms of the GNU General Public License version 2 only, as |
7 * published by the Free Software Foundation. | 7 * published by the Free Software Foundation. |
33 #include "opto/opcodes.hpp" | 33 #include "opto/opcodes.hpp" |
34 #include "opto/regmask.hpp" | 34 #include "opto/regmask.hpp" |
35 #include "opto/rootnode.hpp" | 35 #include "opto/rootnode.hpp" |
36 #include "opto/runtime.hpp" | 36 #include "opto/runtime.hpp" |
37 #include "opto/type.hpp" | 37 #include "opto/type.hpp" |
38 #include "opto/vectornode.hpp" | |
38 #include "runtime/atomic.hpp" | 39 #include "runtime/atomic.hpp" |
39 #include "runtime/os.hpp" | 40 #include "runtime/os.hpp" |
40 #ifdef TARGET_ARCH_MODEL_x86_32 | 41 #ifdef TARGET_ARCH_MODEL_x86_32 |
41 # include "adfiles/ad_x86_32.hpp" | 42 # include "adfiles/ad_x86_32.hpp" |
42 #endif | 43 #endif |
55 #ifdef TARGET_ARCH_MODEL_ppc | 56 #ifdef TARGET_ARCH_MODEL_ppc |
56 # include "adfiles/ad_ppc.hpp" | 57 # include "adfiles/ad_ppc.hpp" |
57 #endif | 58 #endif |
58 | 59 |
59 OptoReg::Name OptoReg::c_frame_pointer; | 60 OptoReg::Name OptoReg::c_frame_pointer; |
60 | |
61 | |
62 | |
63 const int Matcher::base2reg[Type::lastype] = { | |
64 Node::NotAMachineReg,0,0, Op_RegI, Op_RegL, 0, Op_RegN, | |
65 Node::NotAMachineReg, Node::NotAMachineReg, /* tuple, array */ | |
66 Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, /* the pointers */ | |
67 0, 0/*abio*/, | |
68 Op_RegP /* Return address */, 0, /* the memories */ | |
69 Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD, | |
70 0 /*bottom*/ | |
71 }; | |
72 | 61 |
73 const RegMask *Matcher::idealreg2regmask[_last_machine_leaf]; | 62 const RegMask *Matcher::idealreg2regmask[_last_machine_leaf]; |
74 RegMask Matcher::mreg2regmask[_last_Mach_Reg]; | 63 RegMask Matcher::mreg2regmask[_last_Mach_Reg]; |
75 RegMask Matcher::STACK_ONLY_mask; | 64 RegMask Matcher::STACK_ONLY_mask; |
76 RegMask Matcher::c_frame_ptr_mask; | 65 RegMask Matcher::c_frame_ptr_mask; |
105 idealreg2spillmask [Op_RegN] = NULL; | 94 idealreg2spillmask [Op_RegN] = NULL; |
106 idealreg2spillmask [Op_RegL] = NULL; | 95 idealreg2spillmask [Op_RegL] = NULL; |
107 idealreg2spillmask [Op_RegF] = NULL; | 96 idealreg2spillmask [Op_RegF] = NULL; |
108 idealreg2spillmask [Op_RegD] = NULL; | 97 idealreg2spillmask [Op_RegD] = NULL; |
109 idealreg2spillmask [Op_RegP] = NULL; | 98 idealreg2spillmask [Op_RegP] = NULL; |
99 idealreg2spillmask [Op_VecS] = NULL; | |
100 idealreg2spillmask [Op_VecD] = NULL; | |
101 idealreg2spillmask [Op_VecX] = NULL; | |
102 idealreg2spillmask [Op_VecY] = NULL; | |
110 | 103 |
111 idealreg2debugmask [Op_RegI] = NULL; | 104 idealreg2debugmask [Op_RegI] = NULL; |
112 idealreg2debugmask [Op_RegN] = NULL; | 105 idealreg2debugmask [Op_RegN] = NULL; |
113 idealreg2debugmask [Op_RegL] = NULL; | 106 idealreg2debugmask [Op_RegL] = NULL; |
114 idealreg2debugmask [Op_RegF] = NULL; | 107 idealreg2debugmask [Op_RegF] = NULL; |
115 idealreg2debugmask [Op_RegD] = NULL; | 108 idealreg2debugmask [Op_RegD] = NULL; |
116 idealreg2debugmask [Op_RegP] = NULL; | 109 idealreg2debugmask [Op_RegP] = NULL; |
110 idealreg2debugmask [Op_VecS] = NULL; | |
111 idealreg2debugmask [Op_VecD] = NULL; | |
112 idealreg2debugmask [Op_VecX] = NULL; | |
113 idealreg2debugmask [Op_VecY] = NULL; | |
117 | 114 |
118 idealreg2mhdebugmask[Op_RegI] = NULL; | 115 idealreg2mhdebugmask[Op_RegI] = NULL; |
119 idealreg2mhdebugmask[Op_RegN] = NULL; | 116 idealreg2mhdebugmask[Op_RegN] = NULL; |
120 idealreg2mhdebugmask[Op_RegL] = NULL; | 117 idealreg2mhdebugmask[Op_RegL] = NULL; |
121 idealreg2mhdebugmask[Op_RegF] = NULL; | 118 idealreg2mhdebugmask[Op_RegF] = NULL; |
122 idealreg2mhdebugmask[Op_RegD] = NULL; | 119 idealreg2mhdebugmask[Op_RegD] = NULL; |
123 idealreg2mhdebugmask[Op_RegP] = NULL; | 120 idealreg2mhdebugmask[Op_RegP] = NULL; |
121 idealreg2mhdebugmask[Op_VecS] = NULL; | |
122 idealreg2mhdebugmask[Op_VecD] = NULL; | |
123 idealreg2mhdebugmask[Op_VecX] = NULL; | |
124 idealreg2mhdebugmask[Op_VecY] = NULL; | |
124 | 125 |
125 debug_only(_mem_node = NULL;) // Ideal memory node consumed by mach node | 126 debug_only(_mem_node = NULL;) // Ideal memory node consumed by mach node |
126 } | 127 } |
127 | 128 |
128 //------------------------------warp_incoming_stk_arg------------------------ | 129 //------------------------------warp_incoming_stk_arg------------------------ |
132 if( reg->is_stack() ) { // Stack slot argument? | 133 if( reg->is_stack() ) { // Stack slot argument? |
133 warped = OptoReg::add(_old_SP, reg->reg2stack() ); | 134 warped = OptoReg::add(_old_SP, reg->reg2stack() ); |
134 warped = OptoReg::add(warped, C->out_preserve_stack_slots()); | 135 warped = OptoReg::add(warped, C->out_preserve_stack_slots()); |
135 if( warped >= _in_arg_limit ) | 136 if( warped >= _in_arg_limit ) |
136 _in_arg_limit = OptoReg::add(warped, 1); // Bump max stack slot seen | 137 _in_arg_limit = OptoReg::add(warped, 1); // Bump max stack slot seen |
137 if (!RegMask::can_represent(warped)) { | 138 if (!RegMask::can_represent_arg(warped)) { |
138 // the compiler cannot represent this method's calling sequence | 139 // the compiler cannot represent this method's calling sequence |
139 C->record_method_not_compilable_all_tiers("unsupported incoming calling sequence"); | 140 C->record_method_not_compilable_all_tiers("unsupported incoming calling sequence"); |
140 return OptoReg::Bad; | 141 return OptoReg::Bad; |
141 } | 142 } |
142 return warped; | 143 return warped; |
300 // Compute highest outgoing stack argument as | 301 // Compute highest outgoing stack argument as |
301 // _new_SP + out_preserve_stack_slots + max(outgoing argument size). | 302 // _new_SP + out_preserve_stack_slots + max(outgoing argument size). |
302 _out_arg_limit = OptoReg::add(_new_SP, C->out_preserve_stack_slots()); | 303 _out_arg_limit = OptoReg::add(_new_SP, C->out_preserve_stack_slots()); |
303 assert( is_even(_out_arg_limit), "out_preserve must be even" ); | 304 assert( is_even(_out_arg_limit), "out_preserve must be even" ); |
304 | 305 |
305 if (!RegMask::can_represent(OptoReg::add(_out_arg_limit,-1))) { | 306 if (!RegMask::can_represent_arg(OptoReg::add(_out_arg_limit,-1))) { |
306 // the compiler cannot represent this method's calling sequence | 307 // the compiler cannot represent this method's calling sequence |
307 C->record_method_not_compilable("must be able to represent all call arguments in reg mask"); | 308 C->record_method_not_compilable("must be able to represent all call arguments in reg mask"); |
308 } | 309 } |
309 | 310 |
310 if (C->failing()) return; // bailed out on incoming arg failure | 311 if (C->failing()) return; // bailed out on incoming arg failure |
426 // Disallow any debug info in outgoing argument areas by setting the | 427 // Disallow any debug info in outgoing argument areas by setting the |
427 // initial mask accordingly. | 428 // initial mask accordingly. |
428 void Matcher::init_first_stack_mask() { | 429 void Matcher::init_first_stack_mask() { |
429 | 430 |
430 // Allocate storage for spill masks as masks for the appropriate load type. | 431 // Allocate storage for spill masks as masks for the appropriate load type. |
431 RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * 3*6); | 432 RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * (3*6+4)); |
432 | 433 |
433 idealreg2spillmask [Op_RegN] = &rms[0]; | 434 idealreg2spillmask [Op_RegN] = &rms[0]; |
434 idealreg2spillmask [Op_RegI] = &rms[1]; | 435 idealreg2spillmask [Op_RegI] = &rms[1]; |
435 idealreg2spillmask [Op_RegL] = &rms[2]; | 436 idealreg2spillmask [Op_RegL] = &rms[2]; |
436 idealreg2spillmask [Op_RegF] = &rms[3]; | 437 idealreg2spillmask [Op_RegF] = &rms[3]; |
449 idealreg2mhdebugmask[Op_RegL] = &rms[14]; | 450 idealreg2mhdebugmask[Op_RegL] = &rms[14]; |
450 idealreg2mhdebugmask[Op_RegF] = &rms[15]; | 451 idealreg2mhdebugmask[Op_RegF] = &rms[15]; |
451 idealreg2mhdebugmask[Op_RegD] = &rms[16]; | 452 idealreg2mhdebugmask[Op_RegD] = &rms[16]; |
452 idealreg2mhdebugmask[Op_RegP] = &rms[17]; | 453 idealreg2mhdebugmask[Op_RegP] = &rms[17]; |
453 | 454 |
455 idealreg2spillmask [Op_VecS] = &rms[18]; | |
456 idealreg2spillmask [Op_VecD] = &rms[19]; | |
457 idealreg2spillmask [Op_VecX] = &rms[20]; | |
458 idealreg2spillmask [Op_VecY] = &rms[21]; | |
459 | |
454 OptoReg::Name i; | 460 OptoReg::Name i; |
455 | 461 |
456 // At first, start with the empty mask | 462 // At first, start with the empty mask |
457 C->FIRST_STACK_mask().Clear(); | 463 C->FIRST_STACK_mask().Clear(); |
458 | 464 |
460 OptoReg::Name init = OptoReg::add(_old_SP, C->out_preserve_stack_slots()); | 466 OptoReg::Name init = OptoReg::add(_old_SP, C->out_preserve_stack_slots()); |
461 for (i = init; i < _in_arg_limit; i = OptoReg::add(i,1)) | 467 for (i = init; i < _in_arg_limit; i = OptoReg::add(i,1)) |
462 C->FIRST_STACK_mask().Insert(i); | 468 C->FIRST_STACK_mask().Insert(i); |
463 | 469 |
464 // Add in all bits past the outgoing argument area | 470 // Add in all bits past the outgoing argument area |
465 guarantee(RegMask::can_represent(OptoReg::add(_out_arg_limit,-1)), | 471 guarantee(RegMask::can_represent_arg(OptoReg::add(_out_arg_limit,-1)), |
466 "must be able to represent all call arguments in reg mask"); | 472 "must be able to represent all call arguments in reg mask"); |
467 init = _out_arg_limit; | 473 init = _out_arg_limit; |
468 for (i = init; RegMask::can_represent(i); i = OptoReg::add(i,1)) | 474 for (i = init; RegMask::can_represent(i); i = OptoReg::add(i,1)) |
469 C->FIRST_STACK_mask().Insert(i); | 475 C->FIRST_STACK_mask().Insert(i); |
470 | 476 |
471 // Finally, set the "infinite stack" bit. | 477 // Finally, set the "infinite stack" bit. |
472 C->FIRST_STACK_mask().set_AllStack(); | 478 C->FIRST_STACK_mask().set_AllStack(); |
473 | 479 |
474 // Make spill masks. Registers for their class, plus FIRST_STACK_mask. | 480 // Make spill masks. Registers for their class, plus FIRST_STACK_mask. |
481 RegMask aligned_stack_mask = C->FIRST_STACK_mask(); | |
482 // Keep spill masks aligned. | |
483 aligned_stack_mask.clear_to_pairs(); | |
484 assert(aligned_stack_mask.is_AllStack(), "should be infinite stack"); | |
485 | |
486 *idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP]; | |
475 #ifdef _LP64 | 487 #ifdef _LP64 |
476 *idealreg2spillmask[Op_RegN] = *idealreg2regmask[Op_RegN]; | 488 *idealreg2spillmask[Op_RegN] = *idealreg2regmask[Op_RegN]; |
477 idealreg2spillmask[Op_RegN]->OR(C->FIRST_STACK_mask()); | 489 idealreg2spillmask[Op_RegN]->OR(C->FIRST_STACK_mask()); |
490 idealreg2spillmask[Op_RegP]->OR(aligned_stack_mask); | |
491 #else | |
492 idealreg2spillmask[Op_RegP]->OR(C->FIRST_STACK_mask()); | |
478 #endif | 493 #endif |
479 *idealreg2spillmask[Op_RegI] = *idealreg2regmask[Op_RegI]; | 494 *idealreg2spillmask[Op_RegI] = *idealreg2regmask[Op_RegI]; |
480 idealreg2spillmask[Op_RegI]->OR(C->FIRST_STACK_mask()); | 495 idealreg2spillmask[Op_RegI]->OR(C->FIRST_STACK_mask()); |
481 *idealreg2spillmask[Op_RegL] = *idealreg2regmask[Op_RegL]; | 496 *idealreg2spillmask[Op_RegL] = *idealreg2regmask[Op_RegL]; |
482 idealreg2spillmask[Op_RegL]->OR(C->FIRST_STACK_mask()); | 497 idealreg2spillmask[Op_RegL]->OR(aligned_stack_mask); |
483 *idealreg2spillmask[Op_RegF] = *idealreg2regmask[Op_RegF]; | 498 *idealreg2spillmask[Op_RegF] = *idealreg2regmask[Op_RegF]; |
484 idealreg2spillmask[Op_RegF]->OR(C->FIRST_STACK_mask()); | 499 idealreg2spillmask[Op_RegF]->OR(C->FIRST_STACK_mask()); |
485 *idealreg2spillmask[Op_RegD] = *idealreg2regmask[Op_RegD]; | 500 *idealreg2spillmask[Op_RegD] = *idealreg2regmask[Op_RegD]; |
486 idealreg2spillmask[Op_RegD]->OR(C->FIRST_STACK_mask()); | 501 idealreg2spillmask[Op_RegD]->OR(aligned_stack_mask); |
487 *idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP]; | 502 |
488 idealreg2spillmask[Op_RegP]->OR(C->FIRST_STACK_mask()); | 503 if (Matcher::vector_size_supported(T_BYTE,4)) { |
489 | 504 *idealreg2spillmask[Op_VecS] = *idealreg2regmask[Op_VecS]; |
505 idealreg2spillmask[Op_VecS]->OR(C->FIRST_STACK_mask()); | |
506 } | |
507 if (Matcher::vector_size_supported(T_FLOAT,2)) { | |
508 *idealreg2spillmask[Op_VecD] = *idealreg2regmask[Op_VecD]; | |
509 idealreg2spillmask[Op_VecD]->OR(aligned_stack_mask); | |
510 } | |
511 if (Matcher::vector_size_supported(T_FLOAT,4)) { | |
512 aligned_stack_mask.clear_to_sets(RegMask::SlotsPerVecX); | |
513 assert(aligned_stack_mask.is_AllStack(), "should be infinite stack"); | |
514 *idealreg2spillmask[Op_VecX] = *idealreg2regmask[Op_VecX]; | |
515 idealreg2spillmask[Op_VecX]->OR(aligned_stack_mask); | |
516 } | |
517 if (Matcher::vector_size_supported(T_FLOAT,8)) { | |
518 aligned_stack_mask.clear_to_sets(RegMask::SlotsPerVecY); | |
519 assert(aligned_stack_mask.is_AllStack(), "should be infinite stack"); | |
520 *idealreg2spillmask[Op_VecY] = *idealreg2regmask[Op_VecY]; | |
521 idealreg2spillmask[Op_VecY]->OR(aligned_stack_mask); | |
522 } | |
490 if (UseFPUForSpilling) { | 523 if (UseFPUForSpilling) { |
491 // This mask logic assumes that the spill operations are | 524 // This mask logic assumes that the spill operations are |
492 // symmetric and that the registers involved are the same size. | 525 // symmetric and that the registers involved are the same size. |
493 // On sparc for instance we may have to use 64 bit moves will | 526 // On sparc for instance we may have to use 64 bit moves will |
494 // kill 2 registers when used with F0-F31. | 527 // kill 2 registers when used with F0-F31. |
805 idealreg2regmask[Op_RegI] = &spillI->out_RegMask(); | 838 idealreg2regmask[Op_RegI] = &spillI->out_RegMask(); |
806 idealreg2regmask[Op_RegL] = &spillL->out_RegMask(); | 839 idealreg2regmask[Op_RegL] = &spillL->out_RegMask(); |
807 idealreg2regmask[Op_RegF] = &spillF->out_RegMask(); | 840 idealreg2regmask[Op_RegF] = &spillF->out_RegMask(); |
808 idealreg2regmask[Op_RegD] = &spillD->out_RegMask(); | 841 idealreg2regmask[Op_RegD] = &spillD->out_RegMask(); |
809 idealreg2regmask[Op_RegP] = &spillP->out_RegMask(); | 842 idealreg2regmask[Op_RegP] = &spillP->out_RegMask(); |
843 | |
844 // Vector regmasks. | |
845 if (Matcher::vector_size_supported(T_BYTE,4)) { | |
846 TypeVect::VECTS = TypeVect::make(T_BYTE, 4); | |
847 MachNode *spillVectS = match_tree(new (C, 3) LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTS)); | |
848 idealreg2regmask[Op_VecS] = &spillVectS->out_RegMask(); | |
849 } | |
850 if (Matcher::vector_size_supported(T_FLOAT,2)) { | |
851 MachNode *spillVectD = match_tree(new (C, 3) LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTD)); | |
852 idealreg2regmask[Op_VecD] = &spillVectD->out_RegMask(); | |
853 } | |
854 if (Matcher::vector_size_supported(T_FLOAT,4)) { | |
855 MachNode *spillVectX = match_tree(new (C, 3) LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTX)); | |
856 idealreg2regmask[Op_VecX] = &spillVectX->out_RegMask(); | |
857 } | |
858 if (Matcher::vector_size_supported(T_FLOAT,8)) { | |
859 MachNode *spillVectY = match_tree(new (C, 3) LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTY)); | |
860 idealreg2regmask[Op_VecY] = &spillVectY->out_RegMask(); | |
861 } | |
810 } | 862 } |
811 | 863 |
812 #ifdef ASSERT | 864 #ifdef ASSERT |
813 static void match_alias_type(Compile* C, Node* n, Node* m) { | 865 static void match_alias_type(Compile* C, Node* n, Node* m) { |
814 if (!VerifyAliases) return; // do not go looking for trouble by default | 866 if (!VerifyAliases) return; // do not go looking for trouble by default |
1061 // Keep track of the largest numbered stack slot used for an arg. | 1113 // Keep track of the largest numbered stack slot used for an arg. |
1062 // Largest used slot per call-site indicates the amount of stack | 1114 // Largest used slot per call-site indicates the amount of stack |
1063 // that is killed by the call. | 1115 // that is killed by the call. |
1064 if( warped >= out_arg_limit_per_call ) | 1116 if( warped >= out_arg_limit_per_call ) |
1065 out_arg_limit_per_call = OptoReg::add(warped,1); | 1117 out_arg_limit_per_call = OptoReg::add(warped,1); |
1066 if (!RegMask::can_represent(warped)) { | 1118 if (!RegMask::can_represent_arg(warped)) { |
1067 C->record_method_not_compilable_all_tiers("unsupported calling sequence"); | 1119 C->record_method_not_compilable_all_tiers("unsupported calling sequence"); |
1068 return OptoReg::Bad; | 1120 return OptoReg::Bad; |
1069 } | 1121 } |
1070 return warped; | 1122 return warped; |
1071 } | 1123 } |
1249 // Since the max-per-method covers the max-per-call-site and debug info | 1301 // Since the max-per-method covers the max-per-call-site and debug info |
1250 // is excluded on the max-per-method basis, debug info cannot land in | 1302 // is excluded on the max-per-method basis, debug info cannot land in |
1251 // this killed area. | 1303 // this killed area. |
1252 uint r_cnt = mcall->tf()->range()->cnt(); | 1304 uint r_cnt = mcall->tf()->range()->cnt(); |
1253 MachProjNode *proj = new (C, 1) MachProjNode( mcall, r_cnt+10000, RegMask::Empty, MachProjNode::fat_proj ); | 1305 MachProjNode *proj = new (C, 1) MachProjNode( mcall, r_cnt+10000, RegMask::Empty, MachProjNode::fat_proj ); |
1254 if (!RegMask::can_represent(OptoReg::Name(out_arg_limit_per_call-1))) { | 1306 if (!RegMask::can_represent_arg(OptoReg::Name(out_arg_limit_per_call-1))) { |
1255 C->record_method_not_compilable_all_tiers("unsupported outgoing calling sequence"); | 1307 C->record_method_not_compilable_all_tiers("unsupported outgoing calling sequence"); |
1256 } else { | 1308 } else { |
1257 for (int i = begin_out_arg_area; i < out_arg_limit_per_call; i++) | 1309 for (int i = begin_out_arg_area; i < out_arg_limit_per_call; i++) |
1258 proj->_rout.Insert(OptoReg::Name(i)); | 1310 proj->_rout.Insert(OptoReg::Name(i)); |
1259 } | 1311 } |