Mercurial > hg > truffle
diff src/share/vm/opto/matcher.cpp @ 6179:8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
Summary: Increase vector size up to 256-bits for YMM AVX registers on x86.
Reviewed-by: never, twisti, roland
author | kvn |
---|---|
date | Fri, 15 Jun 2012 01:25:19 -0700 |
parents | 65149e74c706 |
children | 1d7922586cf6 |
line wrap: on
line diff
--- a/src/share/vm/opto/matcher.cpp Thu Jun 14 14:59:52 2012 -0700 +++ b/src/share/vm/opto/matcher.cpp Fri Jun 15 01:25:19 2012 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -35,6 +35,7 @@ #include "opto/rootnode.hpp" #include "opto/runtime.hpp" #include "opto/type.hpp" +#include "opto/vectornode.hpp" #include "runtime/atomic.hpp" #include "runtime/os.hpp" #ifdef TARGET_ARCH_MODEL_x86_32 @@ -58,18 +59,6 @@ OptoReg::Name OptoReg::c_frame_pointer; - - -const int Matcher::base2reg[Type::lastype] = { - Node::NotAMachineReg,0,0, Op_RegI, Op_RegL, 0, Op_RegN, - Node::NotAMachineReg, Node::NotAMachineReg, /* tuple, array */ - Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, /* the pointers */ - 0, 0/*abio*/, - Op_RegP /* Return address */, 0, /* the memories */ - Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD, - 0 /*bottom*/ -}; - const RegMask *Matcher::idealreg2regmask[_last_machine_leaf]; RegMask Matcher::mreg2regmask[_last_Mach_Reg]; RegMask Matcher::STACK_ONLY_mask; @@ -107,6 +96,10 @@ idealreg2spillmask [Op_RegF] = NULL; idealreg2spillmask [Op_RegD] = NULL; idealreg2spillmask [Op_RegP] = NULL; + idealreg2spillmask [Op_VecS] = NULL; + idealreg2spillmask [Op_VecD] = NULL; + idealreg2spillmask [Op_VecX] = NULL; + idealreg2spillmask [Op_VecY] = NULL; idealreg2debugmask [Op_RegI] = NULL; idealreg2debugmask [Op_RegN] = NULL; @@ -114,6 +107,10 @@ idealreg2debugmask [Op_RegF] = NULL; idealreg2debugmask [Op_RegD] = NULL; idealreg2debugmask [Op_RegP] = NULL; + idealreg2debugmask [Op_VecS] = NULL; + idealreg2debugmask [Op_VecD] = NULL; + idealreg2debugmask [Op_VecX] = NULL; + idealreg2debugmask [Op_VecY] = NULL; idealreg2mhdebugmask[Op_RegI] = NULL; idealreg2mhdebugmask[Op_RegN] = NULL; @@ -121,6 +118,10 @@ idealreg2mhdebugmask[Op_RegF] = NULL; idealreg2mhdebugmask[Op_RegD] = NULL; idealreg2mhdebugmask[Op_RegP] = NULL; + idealreg2mhdebugmask[Op_VecS] = NULL; + idealreg2mhdebugmask[Op_VecD] = NULL; + idealreg2mhdebugmask[Op_VecX] = NULL; + idealreg2mhdebugmask[Op_VecY] = NULL; debug_only(_mem_node = NULL;) // Ideal memory node consumed by mach node } @@ -134,7 +135,7 @@ warped = OptoReg::add(warped, C->out_preserve_stack_slots()); if( warped >= _in_arg_limit ) _in_arg_limit = OptoReg::add(warped, 1); // Bump max stack slot seen - if (!RegMask::can_represent(warped)) { + if (!RegMask::can_represent_arg(warped)) { // the compiler cannot represent this method's calling sequence C->record_method_not_compilable_all_tiers("unsupported incoming calling sequence"); return OptoReg::Bad; @@ -302,7 +303,7 @@ _out_arg_limit = OptoReg::add(_new_SP, C->out_preserve_stack_slots()); assert( is_even(_out_arg_limit), "out_preserve must be even" ); - if (!RegMask::can_represent(OptoReg::add(_out_arg_limit,-1))) { + if (!RegMask::can_represent_arg(OptoReg::add(_out_arg_limit,-1))) { // the compiler cannot represent this method's calling sequence C->record_method_not_compilable("must be able to represent all call arguments in reg mask"); } @@ -428,7 +429,7 @@ void Matcher::init_first_stack_mask() { // Allocate storage for spill masks as masks for the appropriate load type. - RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * 3*6); + RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * (3*6+4)); idealreg2spillmask [Op_RegN] = &rms[0]; idealreg2spillmask [Op_RegI] = &rms[1]; @@ -451,6 +452,11 @@ idealreg2mhdebugmask[Op_RegD] = &rms[16]; idealreg2mhdebugmask[Op_RegP] = &rms[17]; + idealreg2spillmask [Op_VecS] = &rms[18]; + idealreg2spillmask [Op_VecD] = &rms[19]; + idealreg2spillmask [Op_VecX] = &rms[20]; + idealreg2spillmask [Op_VecY] = &rms[21]; + OptoReg::Name i; // At first, start with the empty mask @@ -462,7 +468,7 @@ C->FIRST_STACK_mask().Insert(i); // Add in all bits past the outgoing argument area - guarantee(RegMask::can_represent(OptoReg::add(_out_arg_limit,-1)), + guarantee(RegMask::can_represent_arg(OptoReg::add(_out_arg_limit,-1)), "must be able to represent all call arguments in reg mask"); init = _out_arg_limit; for (i = init; RegMask::can_represent(i); i = OptoReg::add(i,1)) @@ -472,21 +478,48 @@ C->FIRST_STACK_mask().set_AllStack(); // Make spill masks. Registers for their class, plus FIRST_STACK_mask. + RegMask aligned_stack_mask = C->FIRST_STACK_mask(); + // Keep spill masks aligned. + aligned_stack_mask.clear_to_pairs(); + assert(aligned_stack_mask.is_AllStack(), "should be infinite stack"); + + *idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP]; #ifdef _LP64 *idealreg2spillmask[Op_RegN] = *idealreg2regmask[Op_RegN]; idealreg2spillmask[Op_RegN]->OR(C->FIRST_STACK_mask()); + idealreg2spillmask[Op_RegP]->OR(aligned_stack_mask); +#else + idealreg2spillmask[Op_RegP]->OR(C->FIRST_STACK_mask()); #endif *idealreg2spillmask[Op_RegI] = *idealreg2regmask[Op_RegI]; idealreg2spillmask[Op_RegI]->OR(C->FIRST_STACK_mask()); *idealreg2spillmask[Op_RegL] = *idealreg2regmask[Op_RegL]; - idealreg2spillmask[Op_RegL]->OR(C->FIRST_STACK_mask()); + idealreg2spillmask[Op_RegL]->OR(aligned_stack_mask); *idealreg2spillmask[Op_RegF] = *idealreg2regmask[Op_RegF]; idealreg2spillmask[Op_RegF]->OR(C->FIRST_STACK_mask()); *idealreg2spillmask[Op_RegD] = *idealreg2regmask[Op_RegD]; - idealreg2spillmask[Op_RegD]->OR(C->FIRST_STACK_mask()); - *idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP]; - idealreg2spillmask[Op_RegP]->OR(C->FIRST_STACK_mask()); + idealreg2spillmask[Op_RegD]->OR(aligned_stack_mask); + if (Matcher::vector_size_supported(T_BYTE,4)) { + *idealreg2spillmask[Op_VecS] = *idealreg2regmask[Op_VecS]; + idealreg2spillmask[Op_VecS]->OR(C->FIRST_STACK_mask()); + } + if (Matcher::vector_size_supported(T_FLOAT,2)) { + *idealreg2spillmask[Op_VecD] = *idealreg2regmask[Op_VecD]; + idealreg2spillmask[Op_VecD]->OR(aligned_stack_mask); + } + if (Matcher::vector_size_supported(T_FLOAT,4)) { + aligned_stack_mask.clear_to_sets(RegMask::SlotsPerVecX); + assert(aligned_stack_mask.is_AllStack(), "should be infinite stack"); + *idealreg2spillmask[Op_VecX] = *idealreg2regmask[Op_VecX]; + idealreg2spillmask[Op_VecX]->OR(aligned_stack_mask); + } + if (Matcher::vector_size_supported(T_FLOAT,8)) { + aligned_stack_mask.clear_to_sets(RegMask::SlotsPerVecY); + assert(aligned_stack_mask.is_AllStack(), "should be infinite stack"); + *idealreg2spillmask[Op_VecY] = *idealreg2regmask[Op_VecY]; + idealreg2spillmask[Op_VecY]->OR(aligned_stack_mask); + } if (UseFPUForSpilling) { // This mask logic assumes that the spill operations are // symmetric and that the registers involved are the same size. @@ -807,6 +840,25 @@ idealreg2regmask[Op_RegF] = &spillF->out_RegMask(); idealreg2regmask[Op_RegD] = &spillD->out_RegMask(); idealreg2regmask[Op_RegP] = &spillP->out_RegMask(); + + // Vector regmasks. + if (Matcher::vector_size_supported(T_BYTE,4)) { + TypeVect::VECTS = TypeVect::make(T_BYTE, 4); + MachNode *spillVectS = match_tree(new (C, 3) LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTS)); + idealreg2regmask[Op_VecS] = &spillVectS->out_RegMask(); + } + if (Matcher::vector_size_supported(T_FLOAT,2)) { + MachNode *spillVectD = match_tree(new (C, 3) LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTD)); + idealreg2regmask[Op_VecD] = &spillVectD->out_RegMask(); + } + if (Matcher::vector_size_supported(T_FLOAT,4)) { + MachNode *spillVectX = match_tree(new (C, 3) LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTX)); + idealreg2regmask[Op_VecX] = &spillVectX->out_RegMask(); + } + if (Matcher::vector_size_supported(T_FLOAT,8)) { + MachNode *spillVectY = match_tree(new (C, 3) LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTY)); + idealreg2regmask[Op_VecY] = &spillVectY->out_RegMask(); + } } #ifdef ASSERT @@ -1063,7 +1115,7 @@ // that is killed by the call. if( warped >= out_arg_limit_per_call ) out_arg_limit_per_call = OptoReg::add(warped,1); - if (!RegMask::can_represent(warped)) { + if (!RegMask::can_represent_arg(warped)) { C->record_method_not_compilable_all_tiers("unsupported calling sequence"); return OptoReg::Bad; } @@ -1251,7 +1303,7 @@ // this killed area. uint r_cnt = mcall->tf()->range()->cnt(); MachProjNode *proj = new (C, 1) MachProjNode( mcall, r_cnt+10000, RegMask::Empty, MachProjNode::fat_proj ); - if (!RegMask::can_represent(OptoReg::Name(out_arg_limit_per_call-1))) { + if (!RegMask::can_represent_arg(OptoReg::Name(out_arg_limit_per_call-1))) { C->record_method_not_compilable_all_tiers("unsupported outgoing calling sequence"); } else { for (int i = begin_out_arg_area; i < out_arg_limit_per_call; i++)