Mercurial > hg > truffle
annotate src/share/vm/opto/postaloc.cpp @ 4710:41406797186b
7113012: G1: rename not-fully-young GCs as "mixed"
Summary: Renamed partially-young GCs as mixed and fully-young GCs as young. Change all external output that includes those terms (GC log and GC ergo log) as well as any comments, fields, methods, etc. The changeset also includes very minor code tidying up (added some curly brackets).
Reviewed-by: johnc, brutisso
author | tonyp |
---|---|
date | Fri, 16 Dec 2011 02:14:27 -0500 |
parents | 2209834ccb59 |
children | 5da7201222d5 |
rev | line source |
---|---|
0 | 1 /* |
1972 | 2 * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. |
0 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
948
diff
changeset
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
948
diff
changeset
|
20 * or visit www.oracle.com if you need additional information or have any |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
948
diff
changeset
|
21 * questions. |
0 | 22 * |
23 */ | |
24 | |
1972 | 25 #include "precompiled.hpp" |
26 #include "memory/allocation.inline.hpp" | |
27 #include "opto/chaitin.hpp" | |
28 #include "opto/machnode.hpp" | |
0 | 29 |
30 // see if this register kind does not requires two registers | |
31 static bool is_single_register(uint x) { | |
32 #ifdef _LP64 | |
33 return (x != Op_RegD && x != Op_RegL && x != Op_RegP); | |
34 #else | |
35 return (x != Op_RegD && x != Op_RegL); | |
36 #endif | |
37 } | |
38 | |
400
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
196
diff
changeset
|
39 //---------------------------may_be_copy_of_callee----------------------------- |
0 | 40 // Check to see if we can possibly be a copy of a callee-save value. |
41 bool PhaseChaitin::may_be_copy_of_callee( Node *def ) const { | |
42 // Short circuit if there are no callee save registers | |
43 if (_matcher.number_of_saved_registers() == 0) return false; | |
44 | |
45 // Expect only a spill-down and reload on exit for callee-save spills. | |
46 // Chains of copies cannot be deep. | |
47 // 5008997 - This is wishful thinking. Register allocator seems to | |
48 // be splitting live ranges for callee save registers to such | |
49 // an extent that in large methods the chains can be very long | |
50 // (50+). The conservative answer is to return true if we don't | |
605 | 51 // know as this prevents optimizations from occurring. |
0 | 52 |
53 const int limit = 60; | |
54 int i; | |
55 for( i=0; i < limit; i++ ) { | |
56 if( def->is_Proj() && def->in(0)->is_Start() && | |
57 _matcher.is_save_on_entry(lrgs(n2lidx(def)).reg()) ) | |
58 return true; // Direct use of callee-save proj | |
59 if( def->is_Copy() ) // Copies carry value through | |
60 def = def->in(def->is_Copy()); | |
61 else if( def->is_Phi() ) // Phis can merge it from any direction | |
62 def = def->in(1); | |
63 else | |
64 break; | |
65 guarantee(def != NULL, "must not resurrect dead copy"); | |
66 } | |
67 // If we reached the end and didn't find a callee save proj | |
68 // then this may be a callee save proj so we return true | |
69 // as the conservative answer. If we didn't reach then end | |
70 // we must have discovered that it was not a callee save | |
71 // else we would have returned. | |
72 return i == limit; | |
73 } | |
74 | |
3934
8f47d8870d9a
7087453: PhaseChaitin::yank_if_dead() should handle MachTemp inputs
roland
parents:
2008
diff
changeset
|
75 //------------------------------yank----------------------------------- |
8f47d8870d9a
7087453: PhaseChaitin::yank_if_dead() should handle MachTemp inputs
roland
parents:
2008
diff
changeset
|
76 // Helper function for yank_if_dead |
8f47d8870d9a
7087453: PhaseChaitin::yank_if_dead() should handle MachTemp inputs
roland
parents:
2008
diff
changeset
|
77 int PhaseChaitin::yank( Node *old, Block *current_block, Node_List *value, Node_List *regnd ) { |
8f47d8870d9a
7087453: PhaseChaitin::yank_if_dead() should handle MachTemp inputs
roland
parents:
2008
diff
changeset
|
78 int blk_adjust=0; |
8f47d8870d9a
7087453: PhaseChaitin::yank_if_dead() should handle MachTemp inputs
roland
parents:
2008
diff
changeset
|
79 Block *oldb = _cfg._bbs[old->_idx]; |
8f47d8870d9a
7087453: PhaseChaitin::yank_if_dead() should handle MachTemp inputs
roland
parents:
2008
diff
changeset
|
80 oldb->find_remove(old); |
8f47d8870d9a
7087453: PhaseChaitin::yank_if_dead() should handle MachTemp inputs
roland
parents:
2008
diff
changeset
|
81 // Count 1 if deleting an instruction from the current block |
8f47d8870d9a
7087453: PhaseChaitin::yank_if_dead() should handle MachTemp inputs
roland
parents:
2008
diff
changeset
|
82 if( oldb == current_block ) blk_adjust++; |
8f47d8870d9a
7087453: PhaseChaitin::yank_if_dead() should handle MachTemp inputs
roland
parents:
2008
diff
changeset
|
83 _cfg._bbs.map(old->_idx,NULL); |
8f47d8870d9a
7087453: PhaseChaitin::yank_if_dead() should handle MachTemp inputs
roland
parents:
2008
diff
changeset
|
84 OptoReg::Name old_reg = lrgs(n2lidx(old)).reg(); |
8f47d8870d9a
7087453: PhaseChaitin::yank_if_dead() should handle MachTemp inputs
roland
parents:
2008
diff
changeset
|
85 if( regnd && (*regnd)[old_reg]==old ) { // Instruction is currently available? |
8f47d8870d9a
7087453: PhaseChaitin::yank_if_dead() should handle MachTemp inputs
roland
parents:
2008
diff
changeset
|
86 value->map(old_reg,NULL); // Yank from value/regnd maps |
8f47d8870d9a
7087453: PhaseChaitin::yank_if_dead() should handle MachTemp inputs
roland
parents:
2008
diff
changeset
|
87 regnd->map(old_reg,NULL); // This register's value is now unknown |
8f47d8870d9a
7087453: PhaseChaitin::yank_if_dead() should handle MachTemp inputs
roland
parents:
2008
diff
changeset
|
88 } |
8f47d8870d9a
7087453: PhaseChaitin::yank_if_dead() should handle MachTemp inputs
roland
parents:
2008
diff
changeset
|
89 return blk_adjust; |
8f47d8870d9a
7087453: PhaseChaitin::yank_if_dead() should handle MachTemp inputs
roland
parents:
2008
diff
changeset
|
90 } |
0 | 91 |
92 //------------------------------yank_if_dead----------------------------------- | |
93 // Removed an edge from 'old'. Yank if dead. Return adjustment counts to | |
94 // iterators in the current block. | |
95 int PhaseChaitin::yank_if_dead( Node *old, Block *current_block, Node_List *value, Node_List *regnd ) { | |
96 int blk_adjust=0; | |
97 while (old->outcnt() == 0 && old != C->top()) { | |
3934
8f47d8870d9a
7087453: PhaseChaitin::yank_if_dead() should handle MachTemp inputs
roland
parents:
2008
diff
changeset
|
98 blk_adjust += yank(old, current_block, value, regnd); |
8f47d8870d9a
7087453: PhaseChaitin::yank_if_dead() should handle MachTemp inputs
roland
parents:
2008
diff
changeset
|
99 |
8f47d8870d9a
7087453: PhaseChaitin::yank_if_dead() should handle MachTemp inputs
roland
parents:
2008
diff
changeset
|
100 Node *tmp = NULL; |
8f47d8870d9a
7087453: PhaseChaitin::yank_if_dead() should handle MachTemp inputs
roland
parents:
2008
diff
changeset
|
101 for (uint i = 1; i < old->req(); i++) { |
8f47d8870d9a
7087453: PhaseChaitin::yank_if_dead() should handle MachTemp inputs
roland
parents:
2008
diff
changeset
|
102 if (old->in(i)->is_MachTemp()) { |
3941
2209834ccb59
7089632: assert(machtmp->outcnt() == 1) failed: expected for a MachTemp
kvn
parents:
3934
diff
changeset
|
103 // handle TEMP inputs |
3934
8f47d8870d9a
7087453: PhaseChaitin::yank_if_dead() should handle MachTemp inputs
roland
parents:
2008
diff
changeset
|
104 Node* machtmp = old->in(i); |
3941
2209834ccb59
7089632: assert(machtmp->outcnt() == 1) failed: expected for a MachTemp
kvn
parents:
3934
diff
changeset
|
105 if (machtmp->outcnt() == 1) { |
2209834ccb59
7089632: assert(machtmp->outcnt() == 1) failed: expected for a MachTemp
kvn
parents:
3934
diff
changeset
|
106 assert(machtmp->unique_out() == old, "sanity"); |
2209834ccb59
7089632: assert(machtmp->outcnt() == 1) failed: expected for a MachTemp
kvn
parents:
3934
diff
changeset
|
107 blk_adjust += yank(machtmp, current_block, value, regnd); |
2209834ccb59
7089632: assert(machtmp->outcnt() == 1) failed: expected for a MachTemp
kvn
parents:
3934
diff
changeset
|
108 machtmp->disconnect_inputs(NULL); |
2209834ccb59
7089632: assert(machtmp->outcnt() == 1) failed: expected for a MachTemp
kvn
parents:
3934
diff
changeset
|
109 } |
3934
8f47d8870d9a
7087453: PhaseChaitin::yank_if_dead() should handle MachTemp inputs
roland
parents:
2008
diff
changeset
|
110 } else { |
8f47d8870d9a
7087453: PhaseChaitin::yank_if_dead() should handle MachTemp inputs
roland
parents:
2008
diff
changeset
|
111 assert(tmp == NULL, "can't handle more non MachTemp inputs"); |
8f47d8870d9a
7087453: PhaseChaitin::yank_if_dead() should handle MachTemp inputs
roland
parents:
2008
diff
changeset
|
112 tmp = old->in(i); |
8f47d8870d9a
7087453: PhaseChaitin::yank_if_dead() should handle MachTemp inputs
roland
parents:
2008
diff
changeset
|
113 } |
0 | 114 } |
115 old->disconnect_inputs(NULL); | |
116 if( !tmp ) break; | |
117 old = tmp; | |
118 } | |
119 return blk_adjust; | |
120 } | |
121 | |
122 //------------------------------use_prior_register----------------------------- | |
123 // Use the prior value instead of the current value, in an effort to make | |
124 // the current value go dead. Return block iterator adjustment, in case | |
125 // we yank some instructions from this block. | |
126 int PhaseChaitin::use_prior_register( Node *n, uint idx, Node *def, Block *current_block, Node_List &value, Node_List ®nd ) { | |
127 // No effect? | |
128 if( def == n->in(idx) ) return 0; | |
129 // Def is currently dead and can be removed? Do not resurrect | |
130 if( def->outcnt() == 0 ) return 0; | |
131 | |
132 // Not every pair of physical registers are assignment compatible, | |
133 // e.g. on sparc floating point registers are not assignable to integer | |
134 // registers. | |
135 const LRG &def_lrg = lrgs(n2lidx(def)); | |
136 OptoReg::Name def_reg = def_lrg.reg(); | |
137 const RegMask &use_mask = n->in_RegMask(idx); | |
138 bool can_use = ( RegMask::can_represent(def_reg) ? (use_mask.Member(def_reg) != 0) | |
139 : (use_mask.is_AllStack() != 0)); | |
140 // Check for a copy to or from a misaligned pair. | |
141 can_use = can_use && !use_mask.is_misaligned_Pair() && !def_lrg.mask().is_misaligned_Pair(); | |
142 | |
143 if (!can_use) | |
144 return 0; | |
145 | |
146 // Capture the old def in case it goes dead... | |
147 Node *old = n->in(idx); | |
148 | |
149 // Save-on-call copies can only be elided if the entire copy chain can go | |
150 // away, lest we get the same callee-save value alive in 2 locations at | |
151 // once. We check for the obvious trivial case here. Although it can | |
152 // sometimes be elided with cooperation outside our scope, here we will just | |
153 // miss the opportunity. :-( | |
154 if( may_be_copy_of_callee(def) ) { | |
155 if( old->outcnt() > 1 ) return 0; // We're the not last user | |
156 int idx = old->is_Copy(); | |
157 assert( idx, "chain of copies being removed" ); | |
158 Node *old2 = old->in(idx); // Chain of copies | |
159 if( old2->outcnt() > 1 ) return 0; // old is not the last user | |
160 int idx2 = old2->is_Copy(); | |
161 if( !idx2 ) return 0; // Not a chain of 2 copies | |
162 if( def != old2->in(idx2) ) return 0; // Chain of exactly 2 copies | |
163 } | |
164 | |
165 // Use the new def | |
166 n->set_req(idx,def); | |
167 _post_alloc++; | |
168 | |
169 // Is old def now dead? We successfully yanked a copy? | |
170 return yank_if_dead(old,current_block,&value,®nd); | |
171 } | |
172 | |
173 | |
174 //------------------------------skip_copies------------------------------------ | |
175 // Skip through any number of copies (that don't mod oop-i-ness) | |
176 Node *PhaseChaitin::skip_copies( Node *c ) { | |
177 int idx = c->is_Copy(); | |
178 uint is_oop = lrgs(n2lidx(c))._is_oop; | |
179 while (idx != 0) { | |
180 guarantee(c->in(idx) != NULL, "must not resurrect dead copy"); | |
181 if (lrgs(n2lidx(c->in(idx)))._is_oop != is_oop) | |
182 break; // casting copy, not the same value | |
183 c = c->in(idx); | |
184 idx = c->is_Copy(); | |
185 } | |
186 return c; | |
187 } | |
188 | |
189 //------------------------------elide_copy------------------------------------- | |
190 // Remove (bypass) copies along Node n, edge k. | |
191 int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &value, Node_List ®nd, bool can_change_regs ) { | |
192 int blk_adjust = 0; | |
193 | |
194 uint nk_idx = n2lidx(n->in(k)); | |
195 OptoReg::Name nk_reg = lrgs(nk_idx ).reg(); | |
196 | |
197 // Remove obvious same-register copies | |
198 Node *x = n->in(k); | |
199 int idx; | |
200 while( (idx=x->is_Copy()) != 0 ) { | |
201 Node *copy = x->in(idx); | |
202 guarantee(copy != NULL, "must not resurrect dead copy"); | |
203 if( lrgs(n2lidx(copy)).reg() != nk_reg ) break; | |
204 blk_adjust += use_prior_register(n,k,copy,current_block,value,regnd); | |
205 if( n->in(k) != copy ) break; // Failed for some cutout? | |
206 x = copy; // Progress, try again | |
207 } | |
208 | |
209 // Phis and 2-address instructions cannot change registers so easily - their | |
210 // outputs must match their input. | |
211 if( !can_change_regs ) | |
212 return blk_adjust; // Only check stupid copies! | |
213 | |
214 // Loop backedges won't have a value-mapping yet | |
215 if( &value == NULL ) return blk_adjust; | |
216 | |
217 // Skip through all copies to the _value_ being used. Do not change from | |
218 // int to pointer. This attempts to jump through a chain of copies, where | |
219 // intermediate copies might be illegal, i.e., value is stored down to stack | |
220 // then reloaded BUT survives in a register the whole way. | |
221 Node *val = skip_copies(n->in(k)); | |
222 | |
2008 | 223 if (val == x && nk_idx != 0 && |
224 regnd[nk_reg] != NULL && regnd[nk_reg] != x && | |
225 n2lidx(x) == n2lidx(regnd[nk_reg])) { | |
226 // When rematerialzing nodes and stretching lifetimes, the | |
227 // allocator will reuse the original def for multidef LRG instead | |
228 // of the current reaching def because it can't know it's safe to | |
229 // do so. After allocation completes if they are in the same LRG | |
230 // then it should use the current reaching def instead. | |
231 n->set_req(k, regnd[nk_reg]); | |
232 blk_adjust += yank_if_dead(val, current_block, &value, ®nd); | |
233 val = skip_copies(n->in(k)); | |
234 } | |
235 | |
0 | 236 if( val == x ) return blk_adjust; // No progress? |
237 | |
238 bool single = is_single_register(val->ideal_reg()); | |
239 uint val_idx = n2lidx(val); | |
240 OptoReg::Name val_reg = lrgs(val_idx).reg(); | |
241 | |
242 // See if it happens to already be in the correct register! | |
243 // (either Phi's direct register, or the common case of the name | |
244 // never-clobbered original-def register) | |
245 if( value[val_reg] == val && | |
246 // Doubles check both halves | |
247 ( single || value[val_reg-1] == val ) ) { | |
248 blk_adjust += use_prior_register(n,k,regnd[val_reg],current_block,value,regnd); | |
249 if( n->in(k) == regnd[val_reg] ) // Success! Quit trying | |
250 return blk_adjust; | |
251 } | |
252 | |
253 // See if we can skip the copy by changing registers. Don't change from | |
254 // using a register to using the stack unless we know we can remove a | |
255 // copy-load. Otherwise we might end up making a pile of Intel cisc-spill | |
256 // ops reading from memory instead of just loading once and using the | |
257 // register. | |
258 | |
259 // Also handle duplicate copies here. | |
260 const Type *t = val->is_Con() ? val->bottom_type() : NULL; | |
261 | |
262 // Scan all registers to see if this value is around already | |
263 for( uint reg = 0; reg < (uint)_max_reg; reg++ ) { | |
400
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
196
diff
changeset
|
264 if (reg == (uint)nk_reg) { |
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
196
diff
changeset
|
265 // Found ourselves so check if there is only one user of this |
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
196
diff
changeset
|
266 // copy and keep on searching for a better copy if so. |
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
196
diff
changeset
|
267 bool ignore_self = true; |
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
196
diff
changeset
|
268 x = n->in(k); |
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
196
diff
changeset
|
269 DUIterator_Fast imax, i = x->fast_outs(imax); |
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
196
diff
changeset
|
270 Node* first = x->fast_out(i); i++; |
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
196
diff
changeset
|
271 while (i < imax && ignore_self) { |
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
196
diff
changeset
|
272 Node* use = x->fast_out(i); i++; |
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
196
diff
changeset
|
273 if (use != first) ignore_self = false; |
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
196
diff
changeset
|
274 } |
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
196
diff
changeset
|
275 if (ignore_self) continue; |
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
196
diff
changeset
|
276 } |
cc80376deb0c
6667595: Set probability FAIR for pre-, post- loops and ALWAYS for main loop
kvn
parents:
196
diff
changeset
|
277 |
0 | 278 Node *vv = value[reg]; |
279 if( !single ) { // Doubles check for aligned-adjacent pair | |
280 if( (reg&1)==0 ) continue; // Wrong half of a pair | |
281 if( vv != value[reg-1] ) continue; // Not a complete pair | |
282 } | |
283 if( vv == val || // Got a direct hit? | |
284 (t && vv && vv->bottom_type() == t && vv->is_Mach() && | |
285 vv->as_Mach()->rule() == val->as_Mach()->rule()) ) { // Or same constant? | |
286 assert( !n->is_Phi(), "cannot change registers at a Phi so easily" ); | |
287 if( OptoReg::is_stack(nk_reg) || // CISC-loading from stack OR | |
288 OptoReg::is_reg(reg) || // turning into a register use OR | |
289 regnd[reg]->outcnt()==1 ) { // last use of a spill-load turns into a CISC use | |
290 blk_adjust += use_prior_register(n,k,regnd[reg],current_block,value,regnd); | |
291 if( n->in(k) == regnd[reg] ) // Success! Quit trying | |
292 return blk_adjust; | |
293 } // End of if not degrading to a stack | |
294 } // End of if found value in another register | |
295 } // End of scan all machine registers | |
296 return blk_adjust; | |
297 } | |
298 | |
299 | |
300 // | |
301 // Check if nreg already contains the constant value val. Normal copy | |
302 // elimination doesn't doesn't work on constants because multiple | |
303 // nodes can represent the same constant so the type and rule of the | |
304 // MachNode must be checked to ensure equivalence. | |
305 // | |
70
b683f557224b
6661247: Internal bug in 32-bit HotSpot optimizer while bit manipulations
never
parents:
0
diff
changeset
|
306 bool PhaseChaitin::eliminate_copy_of_constant(Node* val, Node* n, |
b683f557224b
6661247: Internal bug in 32-bit HotSpot optimizer while bit manipulations
never
parents:
0
diff
changeset
|
307 Block *current_block, |
0 | 308 Node_List& value, Node_List& regnd, |
309 OptoReg::Name nreg, OptoReg::Name nreg2) { | |
310 if (value[nreg] != val && val->is_Con() && | |
311 value[nreg] != NULL && value[nreg]->is_Con() && | |
312 (nreg2 == OptoReg::Bad || value[nreg] == value[nreg2]) && | |
313 value[nreg]->bottom_type() == val->bottom_type() && | |
314 value[nreg]->as_Mach()->rule() == val->as_Mach()->rule()) { | |
315 // This code assumes that two MachNodes representing constants | |
316 // which have the same rule and the same bottom type will produce | |
317 // identical effects into a register. This seems like it must be | |
318 // objectively true unless there are hidden inputs to the nodes | |
319 // but if that were to change this code would need to updated. | |
320 // Since they are equivalent the second one if redundant and can | |
321 // be removed. | |
322 // | |
70
b683f557224b
6661247: Internal bug in 32-bit HotSpot optimizer while bit manipulations
never
parents:
0
diff
changeset
|
323 // n will be replaced with the old value but n might have |
0 | 324 // kills projections associated with it so remove them now so that |
605 | 325 // yank_if_dead will be able to eliminate the copy once the uses |
0 | 326 // have been transferred to the old[value]. |
70
b683f557224b
6661247: Internal bug in 32-bit HotSpot optimizer while bit manipulations
never
parents:
0
diff
changeset
|
327 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { |
b683f557224b
6661247: Internal bug in 32-bit HotSpot optimizer while bit manipulations
never
parents:
0
diff
changeset
|
328 Node* use = n->fast_out(i); |
0 | 329 if (use->is_Proj() && use->outcnt() == 0) { |
330 // Kill projections have no users and one input | |
331 use->set_req(0, C->top()); | |
332 yank_if_dead(use, current_block, &value, ®nd); | |
333 --i; --imax; | |
334 } | |
335 } | |
336 _post_alloc++; | |
337 return true; | |
338 } | |
339 return false; | |
340 } | |
341 | |
342 | |
343 //------------------------------post_allocate_copy_removal--------------------- | |
344 // Post-Allocation peephole copy removal. We do this in 1 pass over the | |
345 // basic blocks. We maintain a mapping of registers to Nodes (an array of | |
346 // Nodes indexed by machine register or stack slot number). NULL means that a | |
347 // register is not mapped to any Node. We can (want to have!) have several | |
348 // registers map to the same Node. We walk forward over the instructions | |
349 // updating the mapping as we go. At merge points we force a NULL if we have | |
350 // to merge 2 different Nodes into the same register. Phi functions will give | |
351 // us a new Node if there is a proper value merging. Since the blocks are | |
352 // arranged in some RPO, we will visit all parent blocks before visiting any | |
353 // successor blocks (except at loops). | |
354 // | |
355 // If we find a Copy we look to see if the Copy's source register is a stack | |
356 // slot and that value has already been loaded into some machine register; if | |
357 // so we use machine register directly. This turns a Load into a reg-reg | |
358 // Move. We also look for reloads of identical constants. | |
359 // | |
360 // When we see a use from a reg-reg Copy, we will attempt to use the copy's | |
361 // source directly and make the copy go dead. | |
362 void PhaseChaitin::post_allocate_copy_removal() { | |
363 NOT_PRODUCT( Compile::TracePhase t3("postAllocCopyRemoval", &_t_postAllocCopyRemoval, TimeCompiler); ) | |
364 ResourceMark rm; | |
365 | |
366 // Need a mapping from basic block Node_Lists. We need a Node_List to | |
367 // map from register number to value-producing Node. | |
368 Node_List **blk2value = NEW_RESOURCE_ARRAY( Node_List *, _cfg._num_blocks+1); | |
369 memset( blk2value, 0, sizeof(Node_List*)*(_cfg._num_blocks+1) ); | |
370 // Need a mapping from basic block Node_Lists. We need a Node_List to | |
371 // map from register number to register-defining Node. | |
372 Node_List **blk2regnd = NEW_RESOURCE_ARRAY( Node_List *, _cfg._num_blocks+1); | |
373 memset( blk2regnd, 0, sizeof(Node_List*)*(_cfg._num_blocks+1) ); | |
374 | |
375 // We keep unused Node_Lists on a free_list to avoid wasting | |
376 // memory. | |
377 GrowableArray<Node_List*> free_list = GrowableArray<Node_List*>(16); | |
378 | |
379 // For all blocks | |
380 for( uint i = 0; i < _cfg._num_blocks; i++ ) { | |
381 uint j; | |
382 Block *b = _cfg._blocks[i]; | |
383 | |
384 // Count of Phis in block | |
385 uint phi_dex; | |
386 for( phi_dex = 1; phi_dex < b->_nodes.size(); phi_dex++ ) { | |
387 Node *phi = b->_nodes[phi_dex]; | |
388 if( !phi->is_Phi() ) | |
389 break; | |
390 } | |
391 | |
392 // If any predecessor has not been visited, we do not know the state | |
393 // of registers at the start. Check for this, while updating copies | |
394 // along Phi input edges | |
395 bool missing_some_inputs = false; | |
396 Block *freed = NULL; | |
397 for( j = 1; j < b->num_preds(); j++ ) { | |
398 Block *pb = _cfg._bbs[b->pred(j)->_idx]; | |
399 // Remove copies along phi edges | |
400 for( uint k=1; k<phi_dex; k++ ) | |
401 elide_copy( b->_nodes[k], j, b, *blk2value[pb->_pre_order], *blk2regnd[pb->_pre_order], false ); | |
402 if( blk2value[pb->_pre_order] ) { // Have a mapping on this edge? | |
403 // See if this predecessor's mappings have been used by everybody | |
404 // who wants them. If so, free 'em. | |
405 uint k; | |
406 for( k=0; k<pb->_num_succs; k++ ) { | |
407 Block *pbsucc = pb->_succs[k]; | |
408 if( !blk2value[pbsucc->_pre_order] && pbsucc != b ) | |
409 break; // Found a future user | |
410 } | |
411 if( k >= pb->_num_succs ) { // No more uses, free! | |
412 freed = pb; // Record last block freed | |
413 free_list.push(blk2value[pb->_pre_order]); | |
414 free_list.push(blk2regnd[pb->_pre_order]); | |
415 } | |
416 } else { // This block has unvisited (loopback) inputs | |
417 missing_some_inputs = true; | |
418 } | |
419 } | |
420 | |
421 | |
422 // Extract Node_List mappings. If 'freed' is non-zero, we just popped | |
423 // 'freed's blocks off the list | |
424 Node_List ®nd = *(free_list.is_empty() ? new Node_List() : free_list.pop()); | |
425 Node_List &value = *(free_list.is_empty() ? new Node_List() : free_list.pop()); | |
426 assert( !freed || blk2value[freed->_pre_order] == &value, "" ); | |
427 value.map(_max_reg,NULL); | |
428 regnd.map(_max_reg,NULL); | |
429 // Set mappings as OUR mappings | |
430 blk2value[b->_pre_order] = &value; | |
431 blk2regnd[b->_pre_order] = ®nd; | |
432 | |
433 // Initialize value & regnd for this block | |
434 if( missing_some_inputs ) { | |
435 // Some predecessor has not yet been visited; zap map to empty | |
436 for( uint k = 0; k < (uint)_max_reg; k++ ) { | |
437 value.map(k,NULL); | |
438 regnd.map(k,NULL); | |
439 } | |
440 } else { | |
441 if( !freed ) { // Didn't get a freebie prior block | |
442 // Must clone some data | |
443 freed = _cfg._bbs[b->pred(1)->_idx]; | |
444 Node_List &f_value = *blk2value[freed->_pre_order]; | |
445 Node_List &f_regnd = *blk2regnd[freed->_pre_order]; | |
446 for( uint k = 0; k < (uint)_max_reg; k++ ) { | |
447 value.map(k,f_value[k]); | |
448 regnd.map(k,f_regnd[k]); | |
449 } | |
450 } | |
451 // Merge all inputs together, setting to NULL any conflicts. | |
452 for( j = 1; j < b->num_preds(); j++ ) { | |
453 Block *pb = _cfg._bbs[b->pred(j)->_idx]; | |
454 if( pb == freed ) continue; // Did self already via freelist | |
455 Node_List &p_regnd = *blk2regnd[pb->_pre_order]; | |
456 for( uint k = 0; k < (uint)_max_reg; k++ ) { | |
457 if( regnd[k] != p_regnd[k] ) { // Conflict on reaching defs? | |
458 value.map(k,NULL); // Then no value handy | |
459 regnd.map(k,NULL); | |
460 } | |
461 } | |
462 } | |
463 } | |
464 | |
465 // For all Phi's | |
466 for( j = 1; j < phi_dex; j++ ) { | |
467 uint k; | |
468 Node *phi = b->_nodes[j]; | |
469 uint pidx = n2lidx(phi); | |
470 OptoReg::Name preg = lrgs(n2lidx(phi)).reg(); | |
471 | |
472 // Remove copies remaining on edges. Check for junk phi. | |
473 Node *u = NULL; | |
474 for( k=1; k<phi->req(); k++ ) { | |
475 Node *x = phi->in(k); | |
476 if( phi != x && u != x ) // Found a different input | |
477 u = u ? NodeSentinel : x; // Capture unique input, or NodeSentinel for 2nd input | |
478 } | |
479 if( u != NodeSentinel ) { // Junk Phi. Remove | |
480 b->_nodes.remove(j--); phi_dex--; | |
481 _cfg._bbs.map(phi->_idx,NULL); | |
482 phi->replace_by(u); | |
483 phi->disconnect_inputs(NULL); | |
484 continue; | |
485 } | |
486 // Note that if value[pidx] exists, then we merged no new values here | |
487 // and the phi is useless. This can happen even with the above phi | |
488 // removal for complex flows. I cannot keep the better known value here | |
489 // because locally the phi appears to define a new merged value. If I | |
490 // keep the better value then a copy of the phi, being unable to use the | |
491 // global flow analysis, can't "peek through" the phi to the original | |
492 // reaching value and so will act like it's defining a new value. This | |
493 // can lead to situations where some uses are from the old and some from | |
494 // the new values. Not illegal by itself but throws the over-strong | |
495 // assert in scheduling. | |
496 if( pidx ) { | |
497 value.map(preg,phi); | |
498 regnd.map(preg,phi); | |
499 OptoReg::Name preg_lo = OptoReg::add(preg,-1); | |
500 if( !is_single_register(phi->ideal_reg()) ) { | |
501 value.map(preg_lo,phi); | |
502 regnd.map(preg_lo,phi); | |
503 } | |
504 } | |
505 } | |
506 | |
507 // For all remaining instructions | |
508 for( j = phi_dex; j < b->_nodes.size(); j++ ) { | |
509 Node *n = b->_nodes[j]; | |
510 | |
511 if( n->outcnt() == 0 && // Dead? | |
512 n != C->top() && // (ignore TOP, it has no du info) | |
513 !n->is_Proj() ) { // fat-proj kills | |
514 j -= yank_if_dead(n,b,&value,®nd); | |
515 continue; | |
516 } | |
517 | |
518 // Improve reaching-def info. Occasionally post-alloc's liveness gives | |
519 // up (at loop backedges, because we aren't doing a full flow pass). | |
520 // The presence of a live use essentially asserts that the use's def is | |
521 // alive and well at the use (or else the allocator fubar'd). Take | |
522 // advantage of this info to set a reaching def for the use-reg. | |
523 uint k; | |
524 for( k = 1; k < n->req(); k++ ) { | |
525 Node *def = n->in(k); // n->in(k) is a USE; def is the DEF for this USE | |
526 guarantee(def != NULL, "no disconnected nodes at this point"); | |
527 uint useidx = n2lidx(def); // useidx is the live range index for this USE | |
528 | |
529 if( useidx ) { | |
530 OptoReg::Name ureg = lrgs(useidx).reg(); | |
531 if( !value[ureg] ) { | |
532 int idx; // Skip occasional useless copy | |
533 while( (idx=def->is_Copy()) != 0 && | |
534 def->in(idx) != NULL && // NULL should not happen | |
535 ureg == lrgs(n2lidx(def->in(idx))).reg() ) | |
536 def = def->in(idx); | |
537 Node *valdef = skip_copies(def); // tighten up val through non-useless copies | |
538 value.map(ureg,valdef); // record improved reaching-def info | |
539 regnd.map(ureg, def); | |
540 // Record other half of doubles | |
541 OptoReg::Name ureg_lo = OptoReg::add(ureg,-1); | |
542 if( !is_single_register(def->ideal_reg()) && | |
543 ( !RegMask::can_represent(ureg_lo) || | |
544 lrgs(useidx).mask().Member(ureg_lo) ) && // Nearly always adjacent | |
545 !value[ureg_lo] ) { | |
546 value.map(ureg_lo,valdef); // record improved reaching-def info | |
547 regnd.map(ureg_lo, def); | |
548 } | |
549 } | |
550 } | |
551 } | |
552 | |
553 const uint two_adr = n->is_Mach() ? n->as_Mach()->two_adr() : 0; | |
554 | |
555 // Remove copies along input edges | |
556 for( k = 1; k < n->req(); k++ ) | |
557 j -= elide_copy( n, k, b, value, regnd, two_adr!=k ); | |
558 | |
559 // Unallocated Nodes define no registers | |
560 uint lidx = n2lidx(n); | |
561 if( !lidx ) continue; | |
562 | |
563 // Update the register defined by this instruction | |
564 OptoReg::Name nreg = lrgs(lidx).reg(); | |
565 // Skip through all copies to the _value_ being defined. | |
566 // Do not change from int to pointer | |
567 Node *val = skip_copies(n); | |
568 | |
923 | 569 // Clear out a dead definition before starting so that the |
570 // elimination code doesn't have to guard against it. The | |
571 // definition could in fact be a kill projection with a count of | |
572 // 0 which is safe but since those are uninteresting for copy | |
573 // elimination just delete them as well. | |
574 if (regnd[nreg] != NULL && regnd[nreg]->outcnt() == 0) { | |
575 regnd.map(nreg, NULL); | |
576 value.map(nreg, NULL); | |
577 } | |
578 | |
0 | 579 uint n_ideal_reg = n->ideal_reg(); |
580 if( is_single_register(n_ideal_reg) ) { | |
581 // If Node 'n' does not change the value mapped by the register, | |
582 // then 'n' is a useless copy. Do not update the register->node | |
583 // mapping so 'n' will go dead. | |
584 if( value[nreg] != val ) { | |
70
b683f557224b
6661247: Internal bug in 32-bit HotSpot optimizer while bit manipulations
never
parents:
0
diff
changeset
|
585 if (eliminate_copy_of_constant(val, n, b, value, regnd, nreg, OptoReg::Bad)) { |
923 | 586 j -= replace_and_yank_if_dead(n, nreg, b, value, regnd); |
0 | 587 } else { |
588 // Update the mapping: record new Node defined by the register | |
589 regnd.map(nreg,n); | |
590 // Update mapping for defined *value*, which is the defined | |
591 // Node after skipping all copies. | |
592 value.map(nreg,val); | |
593 } | |
923 | 594 } else if( !may_be_copy_of_callee(n) ) { |
0 | 595 assert( n->is_Copy(), "" ); |
923 | 596 j -= replace_and_yank_if_dead(n, nreg, b, value, regnd); |
0 | 597 } |
598 } else { | |
599 // If the value occupies a register pair, record same info | |
600 // in both registers. | |
601 OptoReg::Name nreg_lo = OptoReg::add(nreg,-1); | |
602 if( RegMask::can_represent(nreg_lo) && // Either a spill slot, or | |
603 !lrgs(lidx).mask().Member(nreg_lo) ) { // Nearly always adjacent | |
604 // Sparc occasionally has non-adjacent pairs. | |
605 // Find the actual other value | |
606 RegMask tmp = lrgs(lidx).mask(); | |
607 tmp.Remove(nreg); | |
608 nreg_lo = tmp.find_first_elem(); | |
609 } | |
610 if( value[nreg] != val || value[nreg_lo] != val ) { | |
70
b683f557224b
6661247: Internal bug in 32-bit HotSpot optimizer while bit manipulations
never
parents:
0
diff
changeset
|
611 if (eliminate_copy_of_constant(val, n, b, value, regnd, nreg, nreg_lo)) { |
923 | 612 j -= replace_and_yank_if_dead(n, nreg, b, value, regnd); |
0 | 613 } else { |
614 regnd.map(nreg , n ); | |
615 regnd.map(nreg_lo, n ); | |
616 value.map(nreg ,val); | |
617 value.map(nreg_lo,val); | |
618 } | |
923 | 619 } else if( !may_be_copy_of_callee(n) ) { |
0 | 620 assert( n->is_Copy(), "" ); |
923 | 621 j -= replace_and_yank_if_dead(n, nreg, b, value, regnd); |
0 | 622 } |
623 } | |
624 | |
625 // Fat projections kill many registers | |
626 if( n_ideal_reg == MachProjNode::fat_proj ) { | |
627 RegMask rm = n->out_RegMask(); | |
628 // wow, what an expensive iterator... | |
629 nreg = rm.find_first_elem(); | |
630 while( OptoReg::is_valid(nreg)) { | |
631 rm.Remove(nreg); | |
632 value.map(nreg,n); | |
633 regnd.map(nreg,n); | |
634 nreg = rm.find_first_elem(); | |
635 } | |
636 } | |
637 | |
638 } // End of for all instructions in the block | |
639 | |
640 } // End for all blocks | |
641 } |