Mercurial > hg > truffle
annotate src/share/vm/opto/compile.cpp @ 1145:e018e6884bd8
6631166: CMS: better heuristics when combatting fragmentation
Summary: Autonomic per-worker free block cache sizing, tunable coalition policies, fixes to per-size block statistics, retuned gain and bandwidth of some feedback loop filters to allow quicker reactivity to abrupt changes in ambient demand, and other heuristics to reduce fragmentation of the CMS old gen. Also tightened some assertions, including those related to locking.
Reviewed-by: jmasa
author | ysr |
---|---|
date | Wed, 23 Dec 2009 09:23:54 -0800 |
parents | 7c57aead6d3e |
children | f96a1a986f7b |
rev | line source |
---|---|
0 | 1 /* |
624 | 2 * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. |
0 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, | |
20 * CA 95054 USA or visit www.sun.com if you need additional information or | |
21 * have any questions. | |
22 * | |
23 */ | |
24 | |
25 #include "incls/_precompiled.incl" | |
26 #include "incls/_compile.cpp.incl" | |
27 | |
28 /// Support for intrinsics. | |
29 | |
30 // Return the index at which m must be inserted (or already exists). | |
31 // The sort order is by the address of the ciMethod, with is_virtual as minor key. | |
32 int Compile::intrinsic_insertion_index(ciMethod* m, bool is_virtual) { | |
33 #ifdef ASSERT | |
34 for (int i = 1; i < _intrinsics->length(); i++) { | |
35 CallGenerator* cg1 = _intrinsics->at(i-1); | |
36 CallGenerator* cg2 = _intrinsics->at(i); | |
37 assert(cg1->method() != cg2->method() | |
38 ? cg1->method() < cg2->method() | |
39 : cg1->is_virtual() < cg2->is_virtual(), | |
40 "compiler intrinsics list must stay sorted"); | |
41 } | |
42 #endif | |
43 // Binary search sorted list, in decreasing intervals [lo, hi]. | |
44 int lo = 0, hi = _intrinsics->length()-1; | |
45 while (lo <= hi) { | |
46 int mid = (uint)(hi + lo) / 2; | |
47 ciMethod* mid_m = _intrinsics->at(mid)->method(); | |
48 if (m < mid_m) { | |
49 hi = mid-1; | |
50 } else if (m > mid_m) { | |
51 lo = mid+1; | |
52 } else { | |
53 // look at minor sort key | |
54 bool mid_virt = _intrinsics->at(mid)->is_virtual(); | |
55 if (is_virtual < mid_virt) { | |
56 hi = mid-1; | |
57 } else if (is_virtual > mid_virt) { | |
58 lo = mid+1; | |
59 } else { | |
60 return mid; // exact match | |
61 } | |
62 } | |
63 } | |
64 return lo; // inexact match | |
65 } | |
66 | |
67 void Compile::register_intrinsic(CallGenerator* cg) { | |
68 if (_intrinsics == NULL) { | |
69 _intrinsics = new GrowableArray<CallGenerator*>(60); | |
70 } | |
71 // This code is stolen from ciObjectFactory::insert. | |
72 // Really, GrowableArray should have methods for | |
73 // insert_at, remove_at, and binary_search. | |
74 int len = _intrinsics->length(); | |
75 int index = intrinsic_insertion_index(cg->method(), cg->is_virtual()); | |
76 if (index == len) { | |
77 _intrinsics->append(cg); | |
78 } else { | |
79 #ifdef ASSERT | |
80 CallGenerator* oldcg = _intrinsics->at(index); | |
81 assert(oldcg->method() != cg->method() || oldcg->is_virtual() != cg->is_virtual(), "don't register twice"); | |
82 #endif | |
83 _intrinsics->append(_intrinsics->at(len-1)); | |
84 int pos; | |
85 for (pos = len-2; pos >= index; pos--) { | |
86 _intrinsics->at_put(pos+1,_intrinsics->at(pos)); | |
87 } | |
88 _intrinsics->at_put(index, cg); | |
89 } | |
90 assert(find_intrinsic(cg->method(), cg->is_virtual()) == cg, "registration worked"); | |
91 } | |
92 | |
93 CallGenerator* Compile::find_intrinsic(ciMethod* m, bool is_virtual) { | |
94 assert(m->is_loaded(), "don't try this on unloaded methods"); | |
95 if (_intrinsics != NULL) { | |
96 int index = intrinsic_insertion_index(m, is_virtual); | |
97 if (index < _intrinsics->length() | |
98 && _intrinsics->at(index)->method() == m | |
99 && _intrinsics->at(index)->is_virtual() == is_virtual) { | |
100 return _intrinsics->at(index); | |
101 } | |
102 } | |
103 // Lazily create intrinsics for intrinsic IDs well-known in the runtime. | |
856
75596850f863
6862576: vmIntrinsics needs cleanup in order to support JSR 292 intrinsics
jrose
parents:
647
diff
changeset
|
104 if (m->intrinsic_id() != vmIntrinsics::_none && |
75596850f863
6862576: vmIntrinsics needs cleanup in order to support JSR 292 intrinsics
jrose
parents:
647
diff
changeset
|
105 m->intrinsic_id() <= vmIntrinsics::LAST_COMPILER_INLINE) { |
0 | 106 CallGenerator* cg = make_vm_intrinsic(m, is_virtual); |
107 if (cg != NULL) { | |
108 // Save it for next time: | |
109 register_intrinsic(cg); | |
110 return cg; | |
111 } else { | |
112 gather_intrinsic_statistics(m->intrinsic_id(), is_virtual, _intrinsic_disabled); | |
113 } | |
114 } | |
115 return NULL; | |
116 } | |
117 | |
118 // Compile:: register_library_intrinsics and make_vm_intrinsic are defined | |
119 // in library_call.cpp. | |
120 | |
121 | |
122 #ifndef PRODUCT | |
123 // statistics gathering... | |
124 | |
125 juint Compile::_intrinsic_hist_count[vmIntrinsics::ID_LIMIT] = {0}; | |
126 jubyte Compile::_intrinsic_hist_flags[vmIntrinsics::ID_LIMIT] = {0}; | |
127 | |
128 bool Compile::gather_intrinsic_statistics(vmIntrinsics::ID id, bool is_virtual, int flags) { | |
129 assert(id > vmIntrinsics::_none && id < vmIntrinsics::ID_LIMIT, "oob"); | |
130 int oflags = _intrinsic_hist_flags[id]; | |
131 assert(flags != 0, "what happened?"); | |
132 if (is_virtual) { | |
133 flags |= _intrinsic_virtual; | |
134 } | |
135 bool changed = (flags != oflags); | |
136 if ((flags & _intrinsic_worked) != 0) { | |
137 juint count = (_intrinsic_hist_count[id] += 1); | |
138 if (count == 1) { | |
139 changed = true; // first time | |
140 } | |
141 // increment the overall count also: | |
142 _intrinsic_hist_count[vmIntrinsics::_none] += 1; | |
143 } | |
144 if (changed) { | |
145 if (((oflags ^ flags) & _intrinsic_virtual) != 0) { | |
146 // Something changed about the intrinsic's virtuality. | |
147 if ((flags & _intrinsic_virtual) != 0) { | |
148 // This is the first use of this intrinsic as a virtual call. | |
149 if (oflags != 0) { | |
150 // We already saw it as a non-virtual, so note both cases. | |
151 flags |= _intrinsic_both; | |
152 } | |
153 } else if ((oflags & _intrinsic_both) == 0) { | |
154 // This is the first use of this intrinsic as a non-virtual | |
155 flags |= _intrinsic_both; | |
156 } | |
157 } | |
158 _intrinsic_hist_flags[id] = (jubyte) (oflags | flags); | |
159 } | |
160 // update the overall flags also: | |
161 _intrinsic_hist_flags[vmIntrinsics::_none] |= (jubyte) flags; | |
162 return changed; | |
163 } | |
164 | |
165 static char* format_flags(int flags, char* buf) { | |
166 buf[0] = 0; | |
167 if ((flags & Compile::_intrinsic_worked) != 0) strcat(buf, ",worked"); | |
168 if ((flags & Compile::_intrinsic_failed) != 0) strcat(buf, ",failed"); | |
169 if ((flags & Compile::_intrinsic_disabled) != 0) strcat(buf, ",disabled"); | |
170 if ((flags & Compile::_intrinsic_virtual) != 0) strcat(buf, ",virtual"); | |
171 if ((flags & Compile::_intrinsic_both) != 0) strcat(buf, ",nonvirtual"); | |
172 if (buf[0] == 0) strcat(buf, ","); | |
173 assert(buf[0] == ',', "must be"); | |
174 return &buf[1]; | |
175 } | |
176 | |
177 void Compile::print_intrinsic_statistics() { | |
178 char flagsbuf[100]; | |
179 ttyLocker ttyl; | |
180 if (xtty != NULL) xtty->head("statistics type='intrinsic'"); | |
181 tty->print_cr("Compiler intrinsic usage:"); | |
182 juint total = _intrinsic_hist_count[vmIntrinsics::_none]; | |
183 if (total == 0) total = 1; // avoid div0 in case of no successes | |
184 #define PRINT_STAT_LINE(name, c, f) \ | |
185 tty->print_cr(" %4d (%4.1f%%) %s (%s)", (int)(c), ((c) * 100.0) / total, name, f); | |
186 for (int index = 1 + (int)vmIntrinsics::_none; index < (int)vmIntrinsics::ID_LIMIT; index++) { | |
187 vmIntrinsics::ID id = (vmIntrinsics::ID) index; | |
188 int flags = _intrinsic_hist_flags[id]; | |
189 juint count = _intrinsic_hist_count[id]; | |
190 if ((flags | count) != 0) { | |
191 PRINT_STAT_LINE(vmIntrinsics::name_at(id), count, format_flags(flags, flagsbuf)); | |
192 } | |
193 } | |
194 PRINT_STAT_LINE("total", total, format_flags(_intrinsic_hist_flags[vmIntrinsics::_none], flagsbuf)); | |
195 if (xtty != NULL) xtty->tail("statistics"); | |
196 } | |
197 | |
198 void Compile::print_statistics() { | |
199 { ttyLocker ttyl; | |
200 if (xtty != NULL) xtty->head("statistics type='opto'"); | |
201 Parse::print_statistics(); | |
202 PhaseCCP::print_statistics(); | |
203 PhaseRegAlloc::print_statistics(); | |
204 Scheduling::print_statistics(); | |
205 PhasePeephole::print_statistics(); | |
206 PhaseIdealLoop::print_statistics(); | |
207 if (xtty != NULL) xtty->tail("statistics"); | |
208 } | |
209 if (_intrinsic_hist_flags[vmIntrinsics::_none] != 0) { | |
210 // put this under its own <statistics> element. | |
211 print_intrinsic_statistics(); | |
212 } | |
213 } | |
214 #endif //PRODUCT | |
215 | |
216 // Support for bundling info | |
217 Bundle* Compile::node_bundling(const Node *n) { | |
218 assert(valid_bundle_info(n), "oob"); | |
219 return &_node_bundling_base[n->_idx]; | |
220 } | |
221 | |
222 bool Compile::valid_bundle_info(const Node *n) { | |
223 return (_node_bundling_limit > n->_idx); | |
224 } | |
225 | |
226 | |
1080
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
227 void Compile::gvn_replace_by(Node* n, Node* nn) { |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
228 for (DUIterator_Last imin, i = n->last_outs(imin); i >= imin; ) { |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
229 Node* use = n->last_out(i); |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
230 bool is_in_table = initial_gvn()->hash_delete(use); |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
231 uint uses_found = 0; |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
232 for (uint j = 0; j < use->len(); j++) { |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
233 if (use->in(j) == n) { |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
234 if (j < use->req()) |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
235 use->set_req(j, nn); |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
236 else |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
237 use->set_prec(j, nn); |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
238 uses_found++; |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
239 } |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
240 } |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
241 if (is_in_table) { |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
242 // reinsert into table |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
243 initial_gvn()->hash_find_insert(use); |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
244 } |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
245 record_for_igvn(use); |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
246 i -= uses_found; // we deleted 1 or more copies of this edge |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
247 } |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
248 } |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
249 |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
250 |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
251 |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
252 |
0 | 253 // Identify all nodes that are reachable from below, useful. |
254 // Use breadth-first pass that records state in a Unique_Node_List, | |
255 // recursive traversal is slower. | |
256 void Compile::identify_useful_nodes(Unique_Node_List &useful) { | |
257 int estimated_worklist_size = unique(); | |
258 useful.map( estimated_worklist_size, NULL ); // preallocate space | |
259 | |
260 // Initialize worklist | |
261 if (root() != NULL) { useful.push(root()); } | |
262 // If 'top' is cached, declare it useful to preserve cached node | |
263 if( cached_top_node() ) { useful.push(cached_top_node()); } | |
264 | |
265 // Push all useful nodes onto the list, breadthfirst | |
266 for( uint next = 0; next < useful.size(); ++next ) { | |
267 assert( next < unique(), "Unique useful nodes < total nodes"); | |
268 Node *n = useful.at(next); | |
269 uint max = n->len(); | |
270 for( uint i = 0; i < max; ++i ) { | |
271 Node *m = n->in(i); | |
272 if( m == NULL ) continue; | |
273 useful.push(m); | |
274 } | |
275 } | |
276 } | |
277 | |
278 // Disconnect all useless nodes by disconnecting those at the boundary. | |
279 void Compile::remove_useless_nodes(Unique_Node_List &useful) { | |
280 uint next = 0; | |
281 while( next < useful.size() ) { | |
282 Node *n = useful.at(next++); | |
283 // Use raw traversal of out edges since this code removes out edges | |
284 int max = n->outcnt(); | |
285 for (int j = 0; j < max; ++j ) { | |
286 Node* child = n->raw_out(j); | |
287 if( ! useful.member(child) ) { | |
288 assert( !child->is_top() || child != top(), | |
289 "If top is cached in Compile object it is in useful list"); | |
290 // Only need to remove this out-edge to the useless node | |
291 n->raw_del_out(j); | |
292 --j; | |
293 --max; | |
294 } | |
295 } | |
296 if (n->outcnt() == 1 && n->has_special_unique_user()) { | |
297 record_for_igvn( n->unique_out() ); | |
298 } | |
299 } | |
300 debug_only(verify_graph_edges(true/*check for no_dead_code*/);) | |
301 } | |
302 | |
303 //------------------------------frame_size_in_words----------------------------- | |
304 // frame_slots in units of words | |
305 int Compile::frame_size_in_words() const { | |
306 // shift is 0 in LP32 and 1 in LP64 | |
307 const int shift = (LogBytesPerWord - LogBytesPerInt); | |
308 int words = _frame_slots >> shift; | |
309 assert( words << shift == _frame_slots, "frame size must be properly aligned in LP64" ); | |
310 return words; | |
311 } | |
312 | |
313 // ============================================================================ | |
314 //------------------------------CompileWrapper--------------------------------- | |
315 class CompileWrapper : public StackObj { | |
316 Compile *const _compile; | |
317 public: | |
318 CompileWrapper(Compile* compile); | |
319 | |
320 ~CompileWrapper(); | |
321 }; | |
322 | |
323 CompileWrapper::CompileWrapper(Compile* compile) : _compile(compile) { | |
324 // the Compile* pointer is stored in the current ciEnv: | |
325 ciEnv* env = compile->env(); | |
326 assert(env == ciEnv::current(), "must already be a ciEnv active"); | |
327 assert(env->compiler_data() == NULL, "compile already active?"); | |
328 env->set_compiler_data(compile); | |
329 assert(compile == Compile::current(), "sanity"); | |
330 | |
331 compile->set_type_dict(NULL); | |
332 compile->set_type_hwm(NULL); | |
333 compile->set_type_last_size(0); | |
334 compile->set_last_tf(NULL, NULL); | |
335 compile->set_indexSet_arena(NULL); | |
336 compile->set_indexSet_free_block_list(NULL); | |
337 compile->init_type_arena(); | |
338 Type::Initialize(compile); | |
339 _compile->set_scratch_buffer_blob(NULL); | |
340 _compile->begin_method(); | |
341 } | |
342 CompileWrapper::~CompileWrapper() { | |
343 _compile->end_method(); | |
344 if (_compile->scratch_buffer_blob() != NULL) | |
345 BufferBlob::free(_compile->scratch_buffer_blob()); | |
346 _compile->env()->set_compiler_data(NULL); | |
347 } | |
348 | |
349 | |
350 //----------------------------print_compile_messages--------------------------- | |
351 void Compile::print_compile_messages() { | |
352 #ifndef PRODUCT | |
353 // Check if recompiling | |
354 if (_subsume_loads == false && PrintOpto) { | |
355 // Recompiling without allowing machine instructions to subsume loads | |
356 tty->print_cr("*********************************************************"); | |
357 tty->print_cr("** Bailout: Recompile without subsuming loads **"); | |
358 tty->print_cr("*********************************************************"); | |
359 } | |
38
b789bcaf2dd9
6667610: (Escape Analysis) retry compilation without EA if it fails
kvn
parents:
0
diff
changeset
|
360 if (_do_escape_analysis != DoEscapeAnalysis && PrintOpto) { |
b789bcaf2dd9
6667610: (Escape Analysis) retry compilation without EA if it fails
kvn
parents:
0
diff
changeset
|
361 // Recompiling without escape analysis |
b789bcaf2dd9
6667610: (Escape Analysis) retry compilation without EA if it fails
kvn
parents:
0
diff
changeset
|
362 tty->print_cr("*********************************************************"); |
b789bcaf2dd9
6667610: (Escape Analysis) retry compilation without EA if it fails
kvn
parents:
0
diff
changeset
|
363 tty->print_cr("** Bailout: Recompile without escape analysis **"); |
b789bcaf2dd9
6667610: (Escape Analysis) retry compilation without EA if it fails
kvn
parents:
0
diff
changeset
|
364 tty->print_cr("*********************************************************"); |
b789bcaf2dd9
6667610: (Escape Analysis) retry compilation without EA if it fails
kvn
parents:
0
diff
changeset
|
365 } |
0 | 366 if (env()->break_at_compile()) { |
605 | 367 // Open the debugger when compiling this method. |
0 | 368 tty->print("### Breaking when compiling: "); |
369 method()->print_short_name(); | |
370 tty->cr(); | |
371 BREAKPOINT; | |
372 } | |
373 | |
374 if( PrintOpto ) { | |
375 if (is_osr_compilation()) { | |
376 tty->print("[OSR]%3d", _compile_id); | |
377 } else { | |
378 tty->print("%3d", _compile_id); | |
379 } | |
380 } | |
381 #endif | |
382 } | |
383 | |
384 | |
385 void Compile::init_scratch_buffer_blob() { | |
386 if( scratch_buffer_blob() != NULL ) return; | |
387 | |
388 // Construct a temporary CodeBuffer to have it construct a BufferBlob | |
389 // Cache this BufferBlob for this compile. | |
390 ResourceMark rm; | |
391 int size = (MAX_inst_size + MAX_stubs_size + MAX_const_size); | |
392 BufferBlob* blob = BufferBlob::create("Compile::scratch_buffer", size); | |
393 // Record the buffer blob for next time. | |
394 set_scratch_buffer_blob(blob); | |
163 | 395 // Have we run out of code space? |
396 if (scratch_buffer_blob() == NULL) { | |
397 // Let CompilerBroker disable further compilations. | |
398 record_failure("Not enough space for scratch buffer in CodeCache"); | |
399 return; | |
400 } | |
0 | 401 |
402 // Initialize the relocation buffers | |
403 relocInfo* locs_buf = (relocInfo*) blob->instructions_end() - MAX_locs_size; | |
404 set_scratch_locs_memory(locs_buf); | |
405 } | |
406 | |
407 | |
408 //-----------------------scratch_emit_size------------------------------------- | |
409 // Helper function that computes size by emitting code | |
410 uint Compile::scratch_emit_size(const Node* n) { | |
411 // Emit into a trash buffer and count bytes emitted. | |
412 // This is a pretty expensive way to compute a size, | |
413 // but it works well enough if seldom used. | |
414 // All common fixed-size instructions are given a size | |
415 // method by the AD file. | |
416 // Note that the scratch buffer blob and locs memory are | |
417 // allocated at the beginning of the compile task, and | |
418 // may be shared by several calls to scratch_emit_size. | |
419 // The allocation of the scratch buffer blob is particularly | |
420 // expensive, since it has to grab the code cache lock. | |
421 BufferBlob* blob = this->scratch_buffer_blob(); | |
422 assert(blob != NULL, "Initialize BufferBlob at start"); | |
423 assert(blob->size() > MAX_inst_size, "sanity"); | |
424 relocInfo* locs_buf = scratch_locs_memory(); | |
425 address blob_begin = blob->instructions_begin(); | |
426 address blob_end = (address)locs_buf; | |
427 assert(blob->instructions_contains(blob_end), "sanity"); | |
428 CodeBuffer buf(blob_begin, blob_end - blob_begin); | |
429 buf.initialize_consts_size(MAX_const_size); | |
430 buf.initialize_stubs_size(MAX_stubs_size); | |
431 assert(locs_buf != NULL, "sanity"); | |
432 int lsize = MAX_locs_size / 2; | |
433 buf.insts()->initialize_shared_locs(&locs_buf[0], lsize); | |
434 buf.stubs()->initialize_shared_locs(&locs_buf[lsize], lsize); | |
435 n->emit(buf, this->regalloc()); | |
436 return buf.code_size(); | |
437 } | |
438 | |
439 | |
440 // ============================================================================ | |
441 //------------------------------Compile standard------------------------------- | |
442 debug_only( int Compile::_debug_idx = 100000; ) | |
443 | |
444 // Compile a method. entry_bci is -1 for normal compilations and indicates | |
445 // the continuation bci for on stack replacement. | |
446 | |
447 | |
38
b789bcaf2dd9
6667610: (Escape Analysis) retry compilation without EA if it fails
kvn
parents:
0
diff
changeset
|
448 Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr_bci, bool subsume_loads, bool do_escape_analysis ) |
0 | 449 : Phase(Compiler), |
450 _env(ci_env), | |
451 _log(ci_env->log()), | |
452 _compile_id(ci_env->compile_id()), | |
453 _save_argument_registers(false), | |
454 _stub_name(NULL), | |
455 _stub_function(NULL), | |
456 _stub_entry_point(NULL), | |
457 _method(target), | |
458 _entry_bci(osr_bci), | |
459 _initial_gvn(NULL), | |
460 _for_igvn(NULL), | |
461 _warm_calls(NULL), | |
462 _subsume_loads(subsume_loads), | |
38
b789bcaf2dd9
6667610: (Escape Analysis) retry compilation without EA if it fails
kvn
parents:
0
diff
changeset
|
463 _do_escape_analysis(do_escape_analysis), |
0 | 464 _failure_reason(NULL), |
465 _code_buffer("Compile::Fill_buffer"), | |
466 _orig_pc_slot(0), | |
467 _orig_pc_slot_offset_in_bytes(0), | |
468 _node_bundling_limit(0), | |
469 _node_bundling_base(NULL), | |
859
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
856
diff
changeset
|
470 _java_calls(0), |
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
856
diff
changeset
|
471 _inner_loops(0), |
0 | 472 #ifndef PRODUCT |
473 _trace_opto_output(TraceOptoOutput || method()->has_option("TraceOptoOutput")), | |
474 _printer(IdealGraphPrinter::printer()), | |
475 #endif | |
476 _congraph(NULL) { | |
477 C = this; | |
478 | |
479 CompileWrapper cw(this); | |
480 #ifndef PRODUCT | |
481 if (TimeCompiler2) { | |
482 tty->print(" "); | |
483 target->holder()->name()->print(); | |
484 tty->print("."); | |
485 target->print_short_name(); | |
486 tty->print(" "); | |
487 } | |
488 TraceTime t1("Total compilation time", &_t_totalCompilation, TimeCompiler, TimeCompiler2); | |
489 TraceTime t2(NULL, &_t_methodCompilation, TimeCompiler, false); | |
100
c7c777385a15
6667042: PrintAssembly option does not work without special plugin
jrose
parents:
65
diff
changeset
|
490 bool print_opto_assembly = PrintOptoAssembly || _method->has_option("PrintOptoAssembly"); |
c7c777385a15
6667042: PrintAssembly option does not work without special plugin
jrose
parents:
65
diff
changeset
|
491 if (!print_opto_assembly) { |
c7c777385a15
6667042: PrintAssembly option does not work without special plugin
jrose
parents:
65
diff
changeset
|
492 bool print_assembly = (PrintAssembly || _method->should_print_assembly()); |
c7c777385a15
6667042: PrintAssembly option does not work without special plugin
jrose
parents:
65
diff
changeset
|
493 if (print_assembly && !Disassembler::can_decode()) { |
c7c777385a15
6667042: PrintAssembly option does not work without special plugin
jrose
parents:
65
diff
changeset
|
494 tty->print_cr("PrintAssembly request changed to PrintOptoAssembly"); |
c7c777385a15
6667042: PrintAssembly option does not work without special plugin
jrose
parents:
65
diff
changeset
|
495 print_opto_assembly = true; |
c7c777385a15
6667042: PrintAssembly option does not work without special plugin
jrose
parents:
65
diff
changeset
|
496 } |
c7c777385a15
6667042: PrintAssembly option does not work without special plugin
jrose
parents:
65
diff
changeset
|
497 } |
c7c777385a15
6667042: PrintAssembly option does not work without special plugin
jrose
parents:
65
diff
changeset
|
498 set_print_assembly(print_opto_assembly); |
367
194b8e3a2fc4
6384206: Phis which are later unneeded are impairing our ability to inline based on static types
never
parents:
333
diff
changeset
|
499 set_parsed_irreducible_loop(false); |
0 | 500 #endif |
501 | |
502 if (ProfileTraps) { | |
503 // Make sure the method being compiled gets its own MDO, | |
504 // so we can at least track the decompile_count(). | |
505 method()->build_method_data(); | |
506 } | |
507 | |
508 Init(::AliasLevel); | |
509 | |
510 | |
511 print_compile_messages(); | |
512 | |
513 if (UseOldInlining || PrintCompilation NOT_PRODUCT( || PrintOpto) ) | |
514 _ilt = InlineTree::build_inline_tree_root(); | |
515 else | |
516 _ilt = NULL; | |
517 | |
518 // Even if NO memory addresses are used, MergeMem nodes must have at least 1 slice | |
519 assert(num_alias_types() >= AliasIdxRaw, ""); | |
520 | |
521 #define MINIMUM_NODE_HASH 1023 | |
522 // Node list that Iterative GVN will start with | |
523 Unique_Node_List for_igvn(comp_arena()); | |
524 set_for_igvn(&for_igvn); | |
525 | |
526 // GVN that will be run immediately on new nodes | |
527 uint estimated_size = method()->code_size()*4+64; | |
528 estimated_size = (estimated_size < MINIMUM_NODE_HASH ? MINIMUM_NODE_HASH : estimated_size); | |
529 PhaseGVN gvn(node_arena(), estimated_size); | |
530 set_initial_gvn(&gvn); | |
531 | |
532 { // Scope for timing the parser | |
533 TracePhase t3("parse", &_t_parser, true); | |
534 | |
535 // Put top into the hash table ASAP. | |
536 initial_gvn()->transform_no_reclaim(top()); | |
537 | |
538 // Set up tf(), start(), and find a CallGenerator. | |
539 CallGenerator* cg; | |
540 if (is_osr_compilation()) { | |
541 const TypeTuple *domain = StartOSRNode::osr_domain(); | |
542 const TypeTuple *range = TypeTuple::make_range(method()->signature()); | |
543 init_tf(TypeFunc::make(domain, range)); | |
544 StartNode* s = new (this, 2) StartOSRNode(root(), domain); | |
545 initial_gvn()->set_type_bottom(s); | |
546 init_start(s); | |
547 cg = CallGenerator::for_osr(method(), entry_bci()); | |
548 } else { | |
549 // Normal case. | |
550 init_tf(TypeFunc::make(method())); | |
551 StartNode* s = new (this, 2) StartNode(root(), tf()->domain()); | |
552 initial_gvn()->set_type_bottom(s); | |
553 init_start(s); | |
554 float past_uses = method()->interpreter_invocation_count(); | |
555 float expected_uses = past_uses; | |
556 cg = CallGenerator::for_inline(method(), expected_uses); | |
557 } | |
558 if (failing()) return; | |
559 if (cg == NULL) { | |
560 record_method_not_compilable_all_tiers("cannot parse method"); | |
561 return; | |
562 } | |
563 JVMState* jvms = build_start_state(start(), tf()); | |
564 if ((jvms = cg->generate(jvms)) == NULL) { | |
565 record_method_not_compilable("method parse failed"); | |
566 return; | |
567 } | |
568 GraphKit kit(jvms); | |
569 | |
570 if (!kit.stopped()) { | |
571 // Accept return values, and transfer control we know not where. | |
572 // This is done by a special, unique ReturnNode bound to root. | |
573 return_values(kit.jvms()); | |
574 } | |
575 | |
576 if (kit.has_exceptions()) { | |
577 // Any exceptions that escape from this call must be rethrown | |
578 // to whatever caller is dynamically above us on the stack. | |
579 // This is done by a special, unique RethrowNode bound to root. | |
580 rethrow_exceptions(kit.transfer_exceptions_into_jvms()); | |
581 } | |
582 | |
1080
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
583 if (!failing() && has_stringbuilder()) { |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
584 { |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
585 // remove useless nodes to make the usage analysis simpler |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
586 ResourceMark rm; |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
587 PhaseRemoveUseless pru(initial_gvn(), &for_igvn); |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
588 } |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
589 |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
590 { |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
591 ResourceMark rm; |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
592 print_method("Before StringOpts", 3); |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
593 PhaseStringOpts pso(initial_gvn(), &for_igvn); |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
594 print_method("After StringOpts", 3); |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
595 } |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
596 |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
597 // now inline anything that we skipped the first time around |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
598 while (_late_inlines.length() > 0) { |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
599 CallGenerator* cg = _late_inlines.pop(); |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
600 cg->do_late_inline(); |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
601 } |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
602 } |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
603 assert(_late_inlines.length() == 0, "should have been processed"); |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
604 |
417 | 605 print_method("Before RemoveUseless", 3); |
367
194b8e3a2fc4
6384206: Phis which are later unneeded are impairing our ability to inline based on static types
never
parents:
333
diff
changeset
|
606 |
0 | 607 // Remove clutter produced by parsing. |
608 if (!failing()) { | |
609 ResourceMark rm; | |
610 PhaseRemoveUseless pru(initial_gvn(), &for_igvn); | |
611 } | |
612 } | |
613 | |
614 // Note: Large methods are capped off in do_one_bytecode(). | |
615 if (failing()) return; | |
616 | |
617 // After parsing, node notes are no longer automagic. | |
618 // They must be propagated by register_new_node_with_optimizer(), | |
619 // clone(), or the like. | |
620 set_default_node_notes(NULL); | |
621 | |
622 for (;;) { | |
623 int successes = Inline_Warm(); | |
624 if (failing()) return; | |
625 if (successes == 0) break; | |
626 } | |
627 | |
628 // Drain the list. | |
629 Finish_Warm(); | |
630 #ifndef PRODUCT | |
631 if (_printer) { | |
632 _printer->print_inlining(this); | |
633 } | |
634 #endif | |
635 | |
636 if (failing()) return; | |
637 NOT_PRODUCT( verify_graph_edges(); ) | |
638 | |
639 // Perform escape analysis | |
244
524eca34ea76
6684714: Optimize EA Connection Graph build performance
kvn
parents:
223
diff
changeset
|
640 if (_do_escape_analysis && ConnectionGraph::has_candidates(this)) { |
524eca34ea76
6684714: Optimize EA Connection Graph build performance
kvn
parents:
223
diff
changeset
|
641 TracePhase t2("escapeAnalysis", &_t_escapeAnalysis, true); |
253
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
247
diff
changeset
|
642 // Add ConP#NULL and ConN#NULL nodes before ConnectionGraph construction. |
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
247
diff
changeset
|
643 PhaseGVN* igvn = initial_gvn(); |
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
247
diff
changeset
|
644 Node* oop_null = igvn->zerocon(T_OBJECT); |
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
247
diff
changeset
|
645 Node* noop_null = igvn->zerocon(T_NARROWOOP); |
244
524eca34ea76
6684714: Optimize EA Connection Graph build performance
kvn
parents:
223
diff
changeset
|
646 |
524eca34ea76
6684714: Optimize EA Connection Graph build performance
kvn
parents:
223
diff
changeset
|
647 _congraph = new(comp_arena()) ConnectionGraph(this); |
524eca34ea76
6684714: Optimize EA Connection Graph build performance
kvn
parents:
223
diff
changeset
|
648 bool has_non_escaping_obj = _congraph->compute_escape(); |
38
b789bcaf2dd9
6667610: (Escape Analysis) retry compilation without EA if it fails
kvn
parents:
0
diff
changeset
|
649 |
0 | 650 #ifndef PRODUCT |
651 if (PrintEscapeAnalysis) { | |
652 _congraph->dump(); | |
653 } | |
654 #endif | |
253
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
247
diff
changeset
|
655 // Cleanup. |
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
247
diff
changeset
|
656 if (oop_null->outcnt() == 0) |
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
247
diff
changeset
|
657 igvn->hash_delete(oop_null); |
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
247
diff
changeset
|
658 if (noop_null->outcnt() == 0) |
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
247
diff
changeset
|
659 igvn->hash_delete(noop_null); |
b0fe4deeb9fb
6726999: nsk/stress/jck12a/jck12a010 assert(n != null,"Bad immediate dominator info.")
kvn
parents:
247
diff
changeset
|
660 |
244
524eca34ea76
6684714: Optimize EA Connection Graph build performance
kvn
parents:
223
diff
changeset
|
661 if (!has_non_escaping_obj) { |
524eca34ea76
6684714: Optimize EA Connection Graph build performance
kvn
parents:
223
diff
changeset
|
662 _congraph = NULL; |
524eca34ea76
6684714: Optimize EA Connection Graph build performance
kvn
parents:
223
diff
changeset
|
663 } |
524eca34ea76
6684714: Optimize EA Connection Graph build performance
kvn
parents:
223
diff
changeset
|
664 |
524eca34ea76
6684714: Optimize EA Connection Graph build performance
kvn
parents:
223
diff
changeset
|
665 if (failing()) return; |
0 | 666 } |
667 // Now optimize | |
668 Optimize(); | |
669 if (failing()) return; | |
670 NOT_PRODUCT( verify_graph_edges(); ) | |
671 | |
672 #ifndef PRODUCT | |
673 if (PrintIdeal) { | |
674 ttyLocker ttyl; // keep the following output all in one block | |
675 // This output goes directly to the tty, not the compiler log. | |
676 // To enable tools to match it up with the compilation activity, | |
677 // be sure to tag this tty output with the compile ID. | |
678 if (xtty != NULL) { | |
679 xtty->head("ideal compile_id='%d'%s", compile_id(), | |
680 is_osr_compilation() ? " compile_kind='osr'" : | |
681 ""); | |
682 } | |
683 root()->dump(9999); | |
684 if (xtty != NULL) { | |
685 xtty->tail("ideal"); | |
686 } | |
687 } | |
688 #endif | |
689 | |
690 // Now that we know the size of all the monitors we can add a fixed slot | |
691 // for the original deopt pc. | |
692 | |
693 _orig_pc_slot = fixed_slots(); | |
694 int next_slot = _orig_pc_slot + (sizeof(address) / VMRegImpl::stack_slot_size); | |
695 set_fixed_slots(next_slot); | |
696 | |
697 // Now generate code | |
698 Code_Gen(); | |
699 if (failing()) return; | |
700 | |
701 // Check if we want to skip execution of all compiled code. | |
702 { | |
703 #ifndef PRODUCT | |
704 if (OptoNoExecute) { | |
705 record_method_not_compilable("+OptoNoExecute"); // Flag as failed | |
706 return; | |
707 } | |
708 TracePhase t2("install_code", &_t_registerMethod, TimeCompiler); | |
709 #endif | |
710 | |
711 if (is_osr_compilation()) { | |
712 _code_offsets.set_value(CodeOffsets::Verified_Entry, 0); | |
713 _code_offsets.set_value(CodeOffsets::OSR_Entry, _first_block_size); | |
714 } else { | |
715 _code_offsets.set_value(CodeOffsets::Verified_Entry, _first_block_size); | |
716 _code_offsets.set_value(CodeOffsets::OSR_Entry, 0); | |
717 } | |
718 | |
719 env()->register_method(_method, _entry_bci, | |
720 &_code_offsets, | |
721 _orig_pc_slot_offset_in_bytes, | |
722 code_buffer(), | |
723 frame_size_in_words(), _oop_map_set, | |
724 &_handler_table, &_inc_table, | |
725 compiler, | |
726 env()->comp_level(), | |
727 true, /*has_debug_info*/ | |
728 has_unsafe_access() | |
729 ); | |
730 } | |
731 } | |
732 | |
733 //------------------------------Compile---------------------------------------- | |
734 // Compile a runtime stub | |
735 Compile::Compile( ciEnv* ci_env, | |
736 TypeFunc_generator generator, | |
737 address stub_function, | |
738 const char *stub_name, | |
739 int is_fancy_jump, | |
740 bool pass_tls, | |
741 bool save_arg_registers, | |
742 bool return_pc ) | |
743 : Phase(Compiler), | |
744 _env(ci_env), | |
745 _log(ci_env->log()), | |
746 _compile_id(-1), | |
747 _save_argument_registers(save_arg_registers), | |
748 _method(NULL), | |
749 _stub_name(stub_name), | |
750 _stub_function(stub_function), | |
751 _stub_entry_point(NULL), | |
752 _entry_bci(InvocationEntryBci), | |
753 _initial_gvn(NULL), | |
754 _for_igvn(NULL), | |
755 _warm_calls(NULL), | |
756 _orig_pc_slot(0), | |
757 _orig_pc_slot_offset_in_bytes(0), | |
758 _subsume_loads(true), | |
38
b789bcaf2dd9
6667610: (Escape Analysis) retry compilation without EA if it fails
kvn
parents:
0
diff
changeset
|
759 _do_escape_analysis(false), |
0 | 760 _failure_reason(NULL), |
761 _code_buffer("Compile::Fill_buffer"), | |
762 _node_bundling_limit(0), | |
763 _node_bundling_base(NULL), | |
859
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
856
diff
changeset
|
764 _java_calls(0), |
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
856
diff
changeset
|
765 _inner_loops(0), |
0 | 766 #ifndef PRODUCT |
767 _trace_opto_output(TraceOptoOutput), | |
768 _printer(NULL), | |
769 #endif | |
770 _congraph(NULL) { | |
771 C = this; | |
772 | |
773 #ifndef PRODUCT | |
774 TraceTime t1(NULL, &_t_totalCompilation, TimeCompiler, false); | |
775 TraceTime t2(NULL, &_t_stubCompilation, TimeCompiler, false); | |
776 set_print_assembly(PrintFrameConverterAssembly); | |
367
194b8e3a2fc4
6384206: Phis which are later unneeded are impairing our ability to inline based on static types
never
parents:
333
diff
changeset
|
777 set_parsed_irreducible_loop(false); |
0 | 778 #endif |
779 CompileWrapper cw(this); | |
780 Init(/*AliasLevel=*/ 0); | |
781 init_tf((*generator)()); | |
782 | |
783 { | |
784 // The following is a dummy for the sake of GraphKit::gen_stub | |
785 Unique_Node_List for_igvn(comp_arena()); | |
786 set_for_igvn(&for_igvn); // not used, but some GraphKit guys push on this | |
787 PhaseGVN gvn(Thread::current()->resource_area(),255); | |
788 set_initial_gvn(&gvn); // not significant, but GraphKit guys use it pervasively | |
789 gvn.transform_no_reclaim(top()); | |
790 | |
791 GraphKit kit; | |
792 kit.gen_stub(stub_function, stub_name, is_fancy_jump, pass_tls, return_pc); | |
793 } | |
794 | |
795 NOT_PRODUCT( verify_graph_edges(); ) | |
796 Code_Gen(); | |
797 if (failing()) return; | |
798 | |
799 | |
800 // Entry point will be accessed using compile->stub_entry_point(); | |
801 if (code_buffer() == NULL) { | |
802 Matcher::soft_match_failure(); | |
803 } else { | |
804 if (PrintAssembly && (WizardMode || Verbose)) | |
805 tty->print_cr("### Stub::%s", stub_name); | |
806 | |
807 if (!failing()) { | |
808 assert(_fixed_slots == 0, "no fixed slots used for runtime stubs"); | |
809 | |
810 // Make the NMethod | |
811 // For now we mark the frame as never safe for profile stackwalking | |
812 RuntimeStub *rs = RuntimeStub::new_runtime_stub(stub_name, | |
813 code_buffer(), | |
814 CodeOffsets::frame_never_safe, | |
815 // _code_offsets.value(CodeOffsets::Frame_Complete), | |
816 frame_size_in_words(), | |
817 _oop_map_set, | |
818 save_arg_registers); | |
819 assert(rs != NULL && rs->is_runtime_stub(), "sanity check"); | |
820 | |
821 _stub_entry_point = rs->entry_point(); | |
822 } | |
823 } | |
824 } | |
825 | |
826 #ifndef PRODUCT | |
827 void print_opto_verbose_signature( const TypeFunc *j_sig, const char *stub_name ) { | |
828 if(PrintOpto && Verbose) { | |
829 tty->print("%s ", stub_name); j_sig->print_flattened(); tty->cr(); | |
830 } | |
831 } | |
832 #endif | |
833 | |
834 void Compile::print_codes() { | |
835 } | |
836 | |
837 //------------------------------Init------------------------------------------- | |
838 // Prepare for a single compilation | |
839 void Compile::Init(int aliaslevel) { | |
840 _unique = 0; | |
841 _regalloc = NULL; | |
842 | |
843 _tf = NULL; // filled in later | |
844 _top = NULL; // cached later | |
845 _matcher = NULL; // filled in later | |
846 _cfg = NULL; // filled in later | |
847 | |
848 set_24_bit_selection_and_mode(Use24BitFP, false); | |
849 | |
850 _node_note_array = NULL; | |
851 _default_node_notes = NULL; | |
852 | |
853 _immutable_memory = NULL; // filled in at first inquiry | |
854 | |
855 // Globally visible Nodes | |
856 // First set TOP to NULL to give safe behavior during creation of RootNode | |
857 set_cached_top_node(NULL); | |
858 set_root(new (this, 3) RootNode()); | |
859 // Now that you have a Root to point to, create the real TOP | |
860 set_cached_top_node( new (this, 1) ConNode(Type::TOP) ); | |
861 set_recent_alloc(NULL, NULL); | |
862 | |
863 // Create Debug Information Recorder to record scopes, oopmaps, etc. | |
864 env()->set_oop_recorder(new OopRecorder(comp_arena())); | |
865 env()->set_debug_info(new DebugInformationRecorder(env()->oop_recorder())); | |
866 env()->set_dependencies(new Dependencies(env())); | |
867 | |
868 _fixed_slots = 0; | |
869 set_has_split_ifs(false); | |
870 set_has_loops(has_method() && method()->has_loops()); // first approximation | |
1080
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
871 set_has_stringbuilder(false); |
0 | 872 _deopt_happens = true; // start out assuming the worst |
873 _trap_can_recompile = false; // no traps emitted yet | |
874 _major_progress = true; // start out assuming good things will happen | |
875 set_has_unsafe_access(false); | |
876 Copy::zero_to_bytes(_trap_hist, sizeof(_trap_hist)); | |
877 set_decompile_count(0); | |
878 | |
418 | 879 set_do_freq_based_layout(BlockLayoutByFrequency || method_has_option("BlockLayoutByFrequency")); |
0 | 880 // Compilation level related initialization |
881 if (env()->comp_level() == CompLevel_fast_compile) { | |
882 set_num_loop_opts(Tier1LoopOptsCount); | |
883 set_do_inlining(Tier1Inline != 0); | |
884 set_max_inline_size(Tier1MaxInlineSize); | |
885 set_freq_inline_size(Tier1FreqInlineSize); | |
886 set_do_scheduling(false); | |
887 set_do_count_invocations(Tier1CountInvocations); | |
888 set_do_method_data_update(Tier1UpdateMethodData); | |
889 } else { | |
890 assert(env()->comp_level() == CompLevel_full_optimization, "unknown comp level"); | |
891 set_num_loop_opts(LoopOptsCount); | |
892 set_do_inlining(Inline); | |
893 set_max_inline_size(MaxInlineSize); | |
894 set_freq_inline_size(FreqInlineSize); | |
895 set_do_scheduling(OptoScheduling); | |
896 set_do_count_invocations(false); | |
897 set_do_method_data_update(false); | |
898 } | |
899 | |
900 if (debug_info()->recording_non_safepoints()) { | |
901 set_node_note_array(new(comp_arena()) GrowableArray<Node_Notes*> | |
902 (comp_arena(), 8, 0, NULL)); | |
903 set_default_node_notes(Node_Notes::make(this)); | |
904 } | |
905 | |
906 // // -- Initialize types before each compile -- | |
907 // // Update cached type information | |
908 // if( _method && _method->constants() ) | |
909 // Type::update_loaded_types(_method, _method->constants()); | |
910 | |
911 // Init alias_type map. | |
38
b789bcaf2dd9
6667610: (Escape Analysis) retry compilation without EA if it fails
kvn
parents:
0
diff
changeset
|
912 if (!_do_escape_analysis && aliaslevel == 3) |
0 | 913 aliaslevel = 2; // No unique types without escape analysis |
914 _AliasLevel = aliaslevel; | |
915 const int grow_ats = 16; | |
916 _max_alias_types = grow_ats; | |
917 _alias_types = NEW_ARENA_ARRAY(comp_arena(), AliasType*, grow_ats); | |
918 AliasType* ats = NEW_ARENA_ARRAY(comp_arena(), AliasType, grow_ats); | |
919 Copy::zero_to_bytes(ats, sizeof(AliasType)*grow_ats); | |
920 { | |
921 for (int i = 0; i < grow_ats; i++) _alias_types[i] = &ats[i]; | |
922 } | |
923 // Initialize the first few types. | |
924 _alias_types[AliasIdxTop]->Init(AliasIdxTop, NULL); | |
925 _alias_types[AliasIdxBot]->Init(AliasIdxBot, TypePtr::BOTTOM); | |
926 _alias_types[AliasIdxRaw]->Init(AliasIdxRaw, TypeRawPtr::BOTTOM); | |
927 _num_alias_types = AliasIdxRaw+1; | |
928 // Zero out the alias type cache. | |
929 Copy::zero_to_bytes(_alias_cache, sizeof(_alias_cache)); | |
930 // A NULL adr_type hits in the cache right away. Preload the right answer. | |
931 probe_alias_cache(NULL)->_index = AliasIdxTop; | |
932 | |
933 _intrinsics = NULL; | |
934 _macro_nodes = new GrowableArray<Node*>(comp_arena(), 8, 0, NULL); | |
935 register_library_intrinsics(); | |
936 } | |
937 | |
938 //---------------------------init_start---------------------------------------- | |
939 // Install the StartNode on this compile object. | |
940 void Compile::init_start(StartNode* s) { | |
941 if (failing()) | |
942 return; // already failing | |
943 assert(s == start(), ""); | |
944 } | |
945 | |
946 StartNode* Compile::start() const { | |
947 assert(!failing(), ""); | |
948 for (DUIterator_Fast imax, i = root()->fast_outs(imax); i < imax; i++) { | |
949 Node* start = root()->fast_out(i); | |
950 if( start->is_Start() ) | |
951 return start->as_Start(); | |
952 } | |
953 ShouldNotReachHere(); | |
954 return NULL; | |
955 } | |
956 | |
957 //-------------------------------immutable_memory------------------------------------- | |
958 // Access immutable memory | |
959 Node* Compile::immutable_memory() { | |
960 if (_immutable_memory != NULL) { | |
961 return _immutable_memory; | |
962 } | |
963 StartNode* s = start(); | |
964 for (DUIterator_Fast imax, i = s->fast_outs(imax); true; i++) { | |
965 Node *p = s->fast_out(i); | |
966 if (p != s && p->as_Proj()->_con == TypeFunc::Memory) { | |
967 _immutable_memory = p; | |
968 return _immutable_memory; | |
969 } | |
970 } | |
971 ShouldNotReachHere(); | |
972 return NULL; | |
973 } | |
974 | |
975 //----------------------set_cached_top_node------------------------------------ | |
976 // Install the cached top node, and make sure Node::is_top works correctly. | |
977 void Compile::set_cached_top_node(Node* tn) { | |
978 if (tn != NULL) verify_top(tn); | |
979 Node* old_top = _top; | |
980 _top = tn; | |
981 // Calling Node::setup_is_top allows the nodes the chance to adjust | |
982 // their _out arrays. | |
983 if (_top != NULL) _top->setup_is_top(); | |
984 if (old_top != NULL) old_top->setup_is_top(); | |
985 assert(_top == NULL || top()->is_top(), ""); | |
986 } | |
987 | |
988 #ifndef PRODUCT | |
989 void Compile::verify_top(Node* tn) const { | |
990 if (tn != NULL) { | |
991 assert(tn->is_Con(), "top node must be a constant"); | |
992 assert(((ConNode*)tn)->type() == Type::TOP, "top node must have correct type"); | |
993 assert(tn->in(0) != NULL, "must have live top node"); | |
994 } | |
995 } | |
996 #endif | |
997 | |
998 | |
999 ///-------------------Managing Per-Node Debug & Profile Info------------------- | |
1000 | |
1001 void Compile::grow_node_notes(GrowableArray<Node_Notes*>* arr, int grow_by) { | |
1002 guarantee(arr != NULL, ""); | |
1003 int num_blocks = arr->length(); | |
1004 if (grow_by < num_blocks) grow_by = num_blocks; | |
1005 int num_notes = grow_by * _node_notes_block_size; | |
1006 Node_Notes* notes = NEW_ARENA_ARRAY(node_arena(), Node_Notes, num_notes); | |
1007 Copy::zero_to_bytes(notes, num_notes * sizeof(Node_Notes)); | |
1008 while (num_notes > 0) { | |
1009 arr->append(notes); | |
1010 notes += _node_notes_block_size; | |
1011 num_notes -= _node_notes_block_size; | |
1012 } | |
1013 assert(num_notes == 0, "exact multiple, please"); | |
1014 } | |
1015 | |
1016 bool Compile::copy_node_notes_to(Node* dest, Node* source) { | |
1017 if (source == NULL || dest == NULL) return false; | |
1018 | |
1019 if (dest->is_Con()) | |
1020 return false; // Do not push debug info onto constants. | |
1021 | |
1022 #ifdef ASSERT | |
1023 // Leave a bread crumb trail pointing to the original node: | |
1024 if (dest != NULL && dest != source && dest->debug_orig() == NULL) { | |
1025 dest->set_debug_orig(source); | |
1026 } | |
1027 #endif | |
1028 | |
1029 if (node_note_array() == NULL) | |
1030 return false; // Not collecting any notes now. | |
1031 | |
1032 // This is a copy onto a pre-existing node, which may already have notes. | |
1033 // If both nodes have notes, do not overwrite any pre-existing notes. | |
1034 Node_Notes* source_notes = node_notes_at(source->_idx); | |
1035 if (source_notes == NULL || source_notes->is_clear()) return false; | |
1036 Node_Notes* dest_notes = node_notes_at(dest->_idx); | |
1037 if (dest_notes == NULL || dest_notes->is_clear()) { | |
1038 return set_node_notes_at(dest->_idx, source_notes); | |
1039 } | |
1040 | |
1041 Node_Notes merged_notes = (*source_notes); | |
1042 // The order of operations here ensures that dest notes will win... | |
1043 merged_notes.update_from(dest_notes); | |
1044 return set_node_notes_at(dest->_idx, &merged_notes); | |
1045 } | |
1046 | |
1047 | |
1048 //--------------------------allow_range_check_smearing------------------------- | |
1049 // Gating condition for coalescing similar range checks. | |
1050 // Sometimes we try 'speculatively' replacing a series of a range checks by a | |
1051 // single covering check that is at least as strong as any of them. | |
1052 // If the optimization succeeds, the simplified (strengthened) range check | |
1053 // will always succeed. If it fails, we will deopt, and then give up | |
1054 // on the optimization. | |
1055 bool Compile::allow_range_check_smearing() const { | |
1056 // If this method has already thrown a range-check, | |
1057 // assume it was because we already tried range smearing | |
1058 // and it failed. | |
1059 uint already_trapped = trap_count(Deoptimization::Reason_range_check); | |
1060 return !already_trapped; | |
1061 } | |
1062 | |
1063 | |
1064 //------------------------------flatten_alias_type----------------------------- | |
1065 const TypePtr *Compile::flatten_alias_type( const TypePtr *tj ) const { | |
1066 int offset = tj->offset(); | |
1067 TypePtr::PTR ptr = tj->ptr(); | |
1068 | |
247 | 1069 // Known instance (scalarizable allocation) alias only with itself. |
1070 bool is_known_inst = tj->isa_oopptr() != NULL && | |
1071 tj->is_oopptr()->is_known_instance(); | |
1072 | |
0 | 1073 // Process weird unsafe references. |
1074 if (offset == Type::OffsetBot && (tj->isa_instptr() /*|| tj->isa_klassptr()*/)) { | |
1075 assert(InlineUnsafeOps, "indeterminate pointers come only from unsafe ops"); | |
247 | 1076 assert(!is_known_inst, "scalarizable allocation should not have unsafe references"); |
0 | 1077 tj = TypeOopPtr::BOTTOM; |
1078 ptr = tj->ptr(); | |
1079 offset = tj->offset(); | |
1080 } | |
1081 | |
1082 // Array pointers need some flattening | |
1083 const TypeAryPtr *ta = tj->isa_aryptr(); | |
247 | 1084 if( ta && is_known_inst ) { |
1085 if ( offset != Type::OffsetBot && | |
1086 offset > arrayOopDesc::length_offset_in_bytes() ) { | |
1087 offset = Type::OffsetBot; // Flatten constant access into array body only | |
1088 tj = ta = TypeAryPtr::make(ptr, ta->ary(), ta->klass(), true, offset, ta->instance_id()); | |
1089 } | |
1090 } else if( ta && _AliasLevel >= 2 ) { | |
0 | 1091 // For arrays indexed by constant indices, we flatten the alias |
1092 // space to include all of the array body. Only the header, klass | |
1093 // and array length can be accessed un-aliased. | |
1094 if( offset != Type::OffsetBot ) { | |
1095 if( ta->const_oop() ) { // methodDataOop or methodOop | |
1096 offset = Type::OffsetBot; // Flatten constant access into array body | |
247 | 1097 tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),ta->ary(),ta->klass(),false,offset); |
0 | 1098 } else if( offset == arrayOopDesc::length_offset_in_bytes() ) { |
1099 // range is OK as-is. | |
1100 tj = ta = TypeAryPtr::RANGE; | |
1101 } else if( offset == oopDesc::klass_offset_in_bytes() ) { | |
1102 tj = TypeInstPtr::KLASS; // all klass loads look alike | |
1103 ta = TypeAryPtr::RANGE; // generic ignored junk | |
1104 ptr = TypePtr::BotPTR; | |
1105 } else if( offset == oopDesc::mark_offset_in_bytes() ) { | |
1106 tj = TypeInstPtr::MARK; | |
1107 ta = TypeAryPtr::RANGE; // generic ignored junk | |
1108 ptr = TypePtr::BotPTR; | |
1109 } else { // Random constant offset into array body | |
1110 offset = Type::OffsetBot; // Flatten constant access into array body | |
247 | 1111 tj = ta = TypeAryPtr::make(ptr,ta->ary(),ta->klass(),false,offset); |
0 | 1112 } |
1113 } | |
1114 // Arrays of fixed size alias with arrays of unknown size. | |
1115 if (ta->size() != TypeInt::POS) { | |
1116 const TypeAry *tary = TypeAry::make(ta->elem(), TypeInt::POS); | |
247 | 1117 tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,ta->klass(),false,offset); |
0 | 1118 } |
1119 // Arrays of known objects become arrays of unknown objects. | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
100
diff
changeset
|
1120 if (ta->elem()->isa_narrowoop() && ta->elem() != TypeNarrowOop::BOTTOM) { |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
100
diff
changeset
|
1121 const TypeAry *tary = TypeAry::make(TypeNarrowOop::BOTTOM, ta->size()); |
247 | 1122 tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,NULL,false,offset); |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
100
diff
changeset
|
1123 } |
0 | 1124 if (ta->elem()->isa_oopptr() && ta->elem() != TypeInstPtr::BOTTOM) { |
1125 const TypeAry *tary = TypeAry::make(TypeInstPtr::BOTTOM, ta->size()); | |
247 | 1126 tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,NULL,false,offset); |
0 | 1127 } |
1128 // Arrays of bytes and of booleans both use 'bastore' and 'baload' so | |
1129 // cannot be distinguished by bytecode alone. | |
1130 if (ta->elem() == TypeInt::BOOL) { | |
1131 const TypeAry *tary = TypeAry::make(TypeInt::BYTE, ta->size()); | |
1132 ciKlass* aklass = ciTypeArrayKlass::make(T_BYTE); | |
247 | 1133 tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,aklass,false,offset); |
0 | 1134 } |
1135 // During the 2nd round of IterGVN, NotNull castings are removed. | |
1136 // Make sure the Bottom and NotNull variants alias the same. | |
1137 // Also, make sure exact and non-exact variants alias the same. | |
1138 if( ptr == TypePtr::NotNull || ta->klass_is_exact() ) { | |
1139 if (ta->const_oop()) { | |
1140 tj = ta = TypeAryPtr::make(TypePtr::Constant,ta->const_oop(),ta->ary(),ta->klass(),false,offset); | |
1141 } else { | |
1142 tj = ta = TypeAryPtr::make(TypePtr::BotPTR,ta->ary(),ta->klass(),false,offset); | |
1143 } | |
1144 } | |
1145 } | |
1146 | |
1147 // Oop pointers need some flattening | |
1148 const TypeInstPtr *to = tj->isa_instptr(); | |
1149 if( to && _AliasLevel >= 2 && to != TypeOopPtr::BOTTOM ) { | |
1150 if( ptr == TypePtr::Constant ) { | |
1151 // No constant oop pointers (such as Strings); they alias with | |
1152 // unknown strings. | |
247 | 1153 assert(!is_known_inst, "not scalarizable allocation"); |
0 | 1154 tj = to = TypeInstPtr::make(TypePtr::BotPTR,to->klass(),false,0,offset); |
247 | 1155 } else if( is_known_inst ) { |
163 | 1156 tj = to; // Keep NotNull and klass_is_exact for instance type |
0 | 1157 } else if( ptr == TypePtr::NotNull || to->klass_is_exact() ) { |
1158 // During the 2nd round of IterGVN, NotNull castings are removed. | |
1159 // Make sure the Bottom and NotNull variants alias the same. | |
1160 // Also, make sure exact and non-exact variants alias the same. | |
247 | 1161 tj = to = TypeInstPtr::make(TypePtr::BotPTR,to->klass(),false,0,offset); |
0 | 1162 } |
1163 // Canonicalize the holder of this field | |
1164 ciInstanceKlass *k = to->klass()->as_instance_klass(); | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
100
diff
changeset
|
1165 if (offset >= 0 && offset < instanceOopDesc::base_offset_in_bytes()) { |
0 | 1166 // First handle header references such as a LoadKlassNode, even if the |
1167 // object's klass is unloaded at compile time (4965979). | |
247 | 1168 if (!is_known_inst) { // Do it only for non-instance types |
1169 tj = to = TypeInstPtr::make(TypePtr::BotPTR, env()->Object_klass(), false, NULL, offset); | |
1170 } | |
0 | 1171 } else if (offset < 0 || offset >= k->size_helper() * wordSize) { |
1172 to = NULL; | |
1173 tj = TypeOopPtr::BOTTOM; | |
1174 offset = tj->offset(); | |
1175 } else { | |
1176 ciInstanceKlass *canonical_holder = k->get_canonical_holder(offset); | |
1177 if (!k->equals(canonical_holder) || tj->offset() != offset) { | |
247 | 1178 if( is_known_inst ) { |
1179 tj = to = TypeInstPtr::make(to->ptr(), canonical_holder, true, NULL, offset, to->instance_id()); | |
1180 } else { | |
1181 tj = to = TypeInstPtr::make(to->ptr(), canonical_holder, false, NULL, offset); | |
1182 } | |
0 | 1183 } |
1184 } | |
1185 } | |
1186 | |
1187 // Klass pointers to object array klasses need some flattening | |
1188 const TypeKlassPtr *tk = tj->isa_klassptr(); | |
1189 if( tk ) { | |
1190 // If we are referencing a field within a Klass, we need | |
1191 // to assume the worst case of an Object. Both exact and | |
1192 // inexact types must flatten to the same alias class. | |
1193 // Since the flattened result for a klass is defined to be | |
1194 // precisely java.lang.Object, use a constant ptr. | |
1195 if ( offset == Type::OffsetBot || (offset >= 0 && (size_t)offset < sizeof(Klass)) ) { | |
1196 | |
1197 tj = tk = TypeKlassPtr::make(TypePtr::Constant, | |
1198 TypeKlassPtr::OBJECT->klass(), | |
1199 offset); | |
1200 } | |
1201 | |
1202 ciKlass* klass = tk->klass(); | |
1203 if( klass->is_obj_array_klass() ) { | |
1204 ciKlass* k = TypeAryPtr::OOPS->klass(); | |
1205 if( !k || !k->is_loaded() ) // Only fails for some -Xcomp runs | |
1206 k = TypeInstPtr::BOTTOM->klass(); | |
1207 tj = tk = TypeKlassPtr::make( TypePtr::NotNull, k, offset ); | |
1208 } | |
1209 | |
1210 // Check for precise loads from the primary supertype array and force them | |
1211 // to the supertype cache alias index. Check for generic array loads from | |
1212 // the primary supertype array and also force them to the supertype cache | |
1213 // alias index. Since the same load can reach both, we need to merge | |
1214 // these 2 disparate memories into the same alias class. Since the | |
1215 // primary supertype array is read-only, there's no chance of confusion | |
1216 // where we bypass an array load and an array store. | |
1217 uint off2 = offset - Klass::primary_supers_offset_in_bytes(); | |
1218 if( offset == Type::OffsetBot || | |
1219 off2 < Klass::primary_super_limit()*wordSize ) { | |
1220 offset = sizeof(oopDesc) +Klass::secondary_super_cache_offset_in_bytes(); | |
1221 tj = tk = TypeKlassPtr::make( TypePtr::NotNull, tk->klass(), offset ); | |
1222 } | |
1223 } | |
1224 | |
1225 // Flatten all Raw pointers together. | |
1226 if (tj->base() == Type::RawPtr) | |
1227 tj = TypeRawPtr::BOTTOM; | |
1228 | |
1229 if (tj->base() == Type::AnyPtr) | |
1230 tj = TypePtr::BOTTOM; // An error, which the caller must check for. | |
1231 | |
1232 // Flatten all to bottom for now | |
1233 switch( _AliasLevel ) { | |
1234 case 0: | |
1235 tj = TypePtr::BOTTOM; | |
1236 break; | |
1237 case 1: // Flatten to: oop, static, field or array | |
1238 switch (tj->base()) { | |
1239 //case Type::AryPtr: tj = TypeAryPtr::RANGE; break; | |
1240 case Type::RawPtr: tj = TypeRawPtr::BOTTOM; break; | |
1241 case Type::AryPtr: // do not distinguish arrays at all | |
1242 case Type::InstPtr: tj = TypeInstPtr::BOTTOM; break; | |
1243 case Type::KlassPtr: tj = TypeKlassPtr::OBJECT; break; | |
1244 case Type::AnyPtr: tj = TypePtr::BOTTOM; break; // caller checks it | |
1245 default: ShouldNotReachHere(); | |
1246 } | |
1247 break; | |
605 | 1248 case 2: // No collapsing at level 2; keep all splits |
1249 case 3: // No collapsing at level 3; keep all splits | |
0 | 1250 break; |
1251 default: | |
1252 Unimplemented(); | |
1253 } | |
1254 | |
1255 offset = tj->offset(); | |
1256 assert( offset != Type::OffsetTop, "Offset has fallen from constant" ); | |
1257 | |
1258 assert( (offset != Type::OffsetBot && tj->base() != Type::AryPtr) || | |
1259 (offset == Type::OffsetBot && tj->base() == Type::AryPtr) || | |
1260 (offset == Type::OffsetBot && tj == TypeOopPtr::BOTTOM) || | |
1261 (offset == Type::OffsetBot && tj == TypePtr::BOTTOM) || | |
1262 (offset == oopDesc::mark_offset_in_bytes() && tj->base() == Type::AryPtr) || | |
1263 (offset == oopDesc::klass_offset_in_bytes() && tj->base() == Type::AryPtr) || | |
1264 (offset == arrayOopDesc::length_offset_in_bytes() && tj->base() == Type::AryPtr) , | |
1265 "For oops, klasses, raw offset must be constant; for arrays the offset is never known" ); | |
1266 assert( tj->ptr() != TypePtr::TopPTR && | |
1267 tj->ptr() != TypePtr::AnyNull && | |
1268 tj->ptr() != TypePtr::Null, "No imprecise addresses" ); | |
1269 // assert( tj->ptr() != TypePtr::Constant || | |
1270 // tj->base() == Type::RawPtr || | |
1271 // tj->base() == Type::KlassPtr, "No constant oop addresses" ); | |
1272 | |
1273 return tj; | |
1274 } | |
1275 | |
1276 void Compile::AliasType::Init(int i, const TypePtr* at) { | |
1277 _index = i; | |
1278 _adr_type = at; | |
1279 _field = NULL; | |
1280 _is_rewritable = true; // default | |
1281 const TypeOopPtr *atoop = (at != NULL) ? at->isa_oopptr() : NULL; | |
223 | 1282 if (atoop != NULL && atoop->is_known_instance()) { |
1283 const TypeOopPtr *gt = atoop->cast_to_instance_id(TypeOopPtr::InstanceBot); | |
0 | 1284 _general_index = Compile::current()->get_alias_index(gt); |
1285 } else { | |
1286 _general_index = 0; | |
1287 } | |
1288 } | |
1289 | |
1290 //---------------------------------print_on------------------------------------ | |
1291 #ifndef PRODUCT | |
1292 void Compile::AliasType::print_on(outputStream* st) { | |
1293 if (index() < 10) | |
1294 st->print("@ <%d> ", index()); | |
1295 else st->print("@ <%d>", index()); | |
1296 st->print(is_rewritable() ? " " : " RO"); | |
1297 int offset = adr_type()->offset(); | |
1298 if (offset == Type::OffsetBot) | |
1299 st->print(" +any"); | |
1300 else st->print(" +%-3d", offset); | |
1301 st->print(" in "); | |
1302 adr_type()->dump_on(st); | |
1303 const TypeOopPtr* tjp = adr_type()->isa_oopptr(); | |
1304 if (field() != NULL && tjp) { | |
1305 if (tjp->klass() != field()->holder() || | |
1306 tjp->offset() != field()->offset_in_bytes()) { | |
1307 st->print(" != "); | |
1308 field()->print(); | |
1309 st->print(" ***"); | |
1310 } | |
1311 } | |
1312 } | |
1313 | |
1314 void print_alias_types() { | |
1315 Compile* C = Compile::current(); | |
1316 tty->print_cr("--- Alias types, AliasIdxBot .. %d", C->num_alias_types()-1); | |
1317 for (int idx = Compile::AliasIdxBot; idx < C->num_alias_types(); idx++) { | |
1318 C->alias_type(idx)->print_on(tty); | |
1319 tty->cr(); | |
1320 } | |
1321 } | |
1322 #endif | |
1323 | |
1324 | |
1325 //----------------------------probe_alias_cache-------------------------------- | |
1326 Compile::AliasCacheEntry* Compile::probe_alias_cache(const TypePtr* adr_type) { | |
1327 intptr_t key = (intptr_t) adr_type; | |
1328 key ^= key >> logAliasCacheSize; | |
1329 return &_alias_cache[key & right_n_bits(logAliasCacheSize)]; | |
1330 } | |
1331 | |
1332 | |
1333 //-----------------------------grow_alias_types-------------------------------- | |
1334 void Compile::grow_alias_types() { | |
1335 const int old_ats = _max_alias_types; // how many before? | |
1336 const int new_ats = old_ats; // how many more? | |
1337 const int grow_ats = old_ats+new_ats; // how many now? | |
1338 _max_alias_types = grow_ats; | |
1339 _alias_types = REALLOC_ARENA_ARRAY(comp_arena(), AliasType*, _alias_types, old_ats, grow_ats); | |
1340 AliasType* ats = NEW_ARENA_ARRAY(comp_arena(), AliasType, new_ats); | |
1341 Copy::zero_to_bytes(ats, sizeof(AliasType)*new_ats); | |
1342 for (int i = 0; i < new_ats; i++) _alias_types[old_ats+i] = &ats[i]; | |
1343 } | |
1344 | |
1345 | |
1346 //--------------------------------find_alias_type------------------------------ | |
1347 Compile::AliasType* Compile::find_alias_type(const TypePtr* adr_type, bool no_create) { | |
1348 if (_AliasLevel == 0) | |
1349 return alias_type(AliasIdxBot); | |
1350 | |
1351 AliasCacheEntry* ace = probe_alias_cache(adr_type); | |
1352 if (ace->_adr_type == adr_type) { | |
1353 return alias_type(ace->_index); | |
1354 } | |
1355 | |
1356 // Handle special cases. | |
1357 if (adr_type == NULL) return alias_type(AliasIdxTop); | |
1358 if (adr_type == TypePtr::BOTTOM) return alias_type(AliasIdxBot); | |
1359 | |
1360 // Do it the slow way. | |
1361 const TypePtr* flat = flatten_alias_type(adr_type); | |
1362 | |
1363 #ifdef ASSERT | |
1364 assert(flat == flatten_alias_type(flat), "idempotent"); | |
1365 assert(flat != TypePtr::BOTTOM, "cannot alias-analyze an untyped ptr"); | |
1366 if (flat->isa_oopptr() && !flat->isa_klassptr()) { | |
1367 const TypeOopPtr* foop = flat->is_oopptr(); | |
247 | 1368 // Scalarizable allocations have exact klass always. |
1369 bool exact = !foop->klass_is_exact() || foop->is_known_instance(); | |
1370 const TypePtr* xoop = foop->cast_to_exactness(exact)->is_ptr(); | |
0 | 1371 assert(foop == flatten_alias_type(xoop), "exactness must not affect alias type"); |
1372 } | |
1373 assert(flat == flatten_alias_type(flat), "exact bit doesn't matter"); | |
1374 #endif | |
1375 | |
1376 int idx = AliasIdxTop; | |
1377 for (int i = 0; i < num_alias_types(); i++) { | |
1378 if (alias_type(i)->adr_type() == flat) { | |
1379 idx = i; | |
1380 break; | |
1381 } | |
1382 } | |
1383 | |
1384 if (idx == AliasIdxTop) { | |
1385 if (no_create) return NULL; | |
1386 // Grow the array if necessary. | |
1387 if (_num_alias_types == _max_alias_types) grow_alias_types(); | |
1388 // Add a new alias type. | |
1389 idx = _num_alias_types++; | |
1390 _alias_types[idx]->Init(idx, flat); | |
1391 if (flat == TypeInstPtr::KLASS) alias_type(idx)->set_rewritable(false); | |
1392 if (flat == TypeAryPtr::RANGE) alias_type(idx)->set_rewritable(false); | |
1393 if (flat->isa_instptr()) { | |
1394 if (flat->offset() == java_lang_Class::klass_offset_in_bytes() | |
1395 && flat->is_instptr()->klass() == env()->Class_klass()) | |
1396 alias_type(idx)->set_rewritable(false); | |
1397 } | |
1398 if (flat->isa_klassptr()) { | |
1399 if (flat->offset() == Klass::super_check_offset_offset_in_bytes() + (int)sizeof(oopDesc)) | |
1400 alias_type(idx)->set_rewritable(false); | |
1401 if (flat->offset() == Klass::modifier_flags_offset_in_bytes() + (int)sizeof(oopDesc)) | |
1402 alias_type(idx)->set_rewritable(false); | |
1403 if (flat->offset() == Klass::access_flags_offset_in_bytes() + (int)sizeof(oopDesc)) | |
1404 alias_type(idx)->set_rewritable(false); | |
1405 if (flat->offset() == Klass::java_mirror_offset_in_bytes() + (int)sizeof(oopDesc)) | |
1406 alias_type(idx)->set_rewritable(false); | |
1407 } | |
1408 // %%% (We would like to finalize JavaThread::threadObj_offset(), | |
1409 // but the base pointer type is not distinctive enough to identify | |
1410 // references into JavaThread.) | |
1411 | |
1412 // Check for final instance fields. | |
1413 const TypeInstPtr* tinst = flat->isa_instptr(); | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
100
diff
changeset
|
1414 if (tinst && tinst->offset() >= instanceOopDesc::base_offset_in_bytes()) { |
0 | 1415 ciInstanceKlass *k = tinst->klass()->as_instance_klass(); |
1416 ciField* field = k->get_field_by_offset(tinst->offset(), false); | |
1417 // Set field() and is_rewritable() attributes. | |
1418 if (field != NULL) alias_type(idx)->set_field(field); | |
1419 } | |
1420 const TypeKlassPtr* tklass = flat->isa_klassptr(); | |
1421 // Check for final static fields. | |
1422 if (tklass && tklass->klass()->is_instance_klass()) { | |
1423 ciInstanceKlass *k = tklass->klass()->as_instance_klass(); | |
1424 ciField* field = k->get_field_by_offset(tklass->offset(), true); | |
1425 // Set field() and is_rewritable() attributes. | |
1426 if (field != NULL) alias_type(idx)->set_field(field); | |
1427 } | |
1428 } | |
1429 | |
1430 // Fill the cache for next time. | |
1431 ace->_adr_type = adr_type; | |
1432 ace->_index = idx; | |
1433 assert(alias_type(adr_type) == alias_type(idx), "type must be installed"); | |
1434 | |
1435 // Might as well try to fill the cache for the flattened version, too. | |
1436 AliasCacheEntry* face = probe_alias_cache(flat); | |
1437 if (face->_adr_type == NULL) { | |
1438 face->_adr_type = flat; | |
1439 face->_index = idx; | |
1440 assert(alias_type(flat) == alias_type(idx), "flat type must work too"); | |
1441 } | |
1442 | |
1443 return alias_type(idx); | |
1444 } | |
1445 | |
1446 | |
1447 Compile::AliasType* Compile::alias_type(ciField* field) { | |
1448 const TypeOopPtr* t; | |
1449 if (field->is_static()) | |
1450 t = TypeKlassPtr::make(field->holder()); | |
1451 else | |
1452 t = TypeOopPtr::make_from_klass_raw(field->holder()); | |
1453 AliasType* atp = alias_type(t->add_offset(field->offset_in_bytes())); | |
1454 assert(field->is_final() == !atp->is_rewritable(), "must get the rewritable bits correct"); | |
1455 return atp; | |
1456 } | |
1457 | |
1458 | |
1459 //------------------------------have_alias_type-------------------------------- | |
1460 bool Compile::have_alias_type(const TypePtr* adr_type) { | |
1461 AliasCacheEntry* ace = probe_alias_cache(adr_type); | |
1462 if (ace->_adr_type == adr_type) { | |
1463 return true; | |
1464 } | |
1465 | |
1466 // Handle special cases. | |
1467 if (adr_type == NULL) return true; | |
1468 if (adr_type == TypePtr::BOTTOM) return true; | |
1469 | |
1470 return find_alias_type(adr_type, true) != NULL; | |
1471 } | |
1472 | |
1473 //-----------------------------must_alias-------------------------------------- | |
1474 // True if all values of the given address type are in the given alias category. | |
1475 bool Compile::must_alias(const TypePtr* adr_type, int alias_idx) { | |
1476 if (alias_idx == AliasIdxBot) return true; // the universal category | |
1477 if (adr_type == NULL) return true; // NULL serves as TypePtr::TOP | |
1478 if (alias_idx == AliasIdxTop) return false; // the empty category | |
1479 if (adr_type->base() == Type::AnyPtr) return false; // TypePtr::BOTTOM or its twins | |
1480 | |
1481 // the only remaining possible overlap is identity | |
1482 int adr_idx = get_alias_index(adr_type); | |
1483 assert(adr_idx != AliasIdxBot && adr_idx != AliasIdxTop, ""); | |
1484 assert(adr_idx == alias_idx || | |
1485 (alias_type(alias_idx)->adr_type() != TypeOopPtr::BOTTOM | |
1486 && adr_type != TypeOopPtr::BOTTOM), | |
1487 "should not be testing for overlap with an unsafe pointer"); | |
1488 return adr_idx == alias_idx; | |
1489 } | |
1490 | |
1491 //------------------------------can_alias-------------------------------------- | |
1492 // True if any values of the given address type are in the given alias category. | |
1493 bool Compile::can_alias(const TypePtr* adr_type, int alias_idx) { | |
1494 if (alias_idx == AliasIdxTop) return false; // the empty category | |
1495 if (adr_type == NULL) return false; // NULL serves as TypePtr::TOP | |
1496 if (alias_idx == AliasIdxBot) return true; // the universal category | |
1497 if (adr_type->base() == Type::AnyPtr) return true; // TypePtr::BOTTOM or its twins | |
1498 | |
1499 // the only remaining possible overlap is identity | |
1500 int adr_idx = get_alias_index(adr_type); | |
1501 assert(adr_idx != AliasIdxBot && adr_idx != AliasIdxTop, ""); | |
1502 return adr_idx == alias_idx; | |
1503 } | |
1504 | |
1505 | |
1506 | |
1507 //---------------------------pop_warm_call------------------------------------- | |
1508 WarmCallInfo* Compile::pop_warm_call() { | |
1509 WarmCallInfo* wci = _warm_calls; | |
1510 if (wci != NULL) _warm_calls = wci->remove_from(wci); | |
1511 return wci; | |
1512 } | |
1513 | |
1514 //----------------------------Inline_Warm-------------------------------------- | |
1515 int Compile::Inline_Warm() { | |
1516 // If there is room, try to inline some more warm call sites. | |
1517 // %%% Do a graph index compaction pass when we think we're out of space? | |
1518 if (!InlineWarmCalls) return 0; | |
1519 | |
1520 int calls_made_hot = 0; | |
1521 int room_to_grow = NodeCountInliningCutoff - unique(); | |
1522 int amount_to_grow = MIN2(room_to_grow, (int)NodeCountInliningStep); | |
1523 int amount_grown = 0; | |
1524 WarmCallInfo* call; | |
1525 while (amount_to_grow > 0 && (call = pop_warm_call()) != NULL) { | |
1526 int est_size = (int)call->size(); | |
1527 if (est_size > (room_to_grow - amount_grown)) { | |
1528 // This one won't fit anyway. Get rid of it. | |
1529 call->make_cold(); | |
1530 continue; | |
1531 } | |
1532 call->make_hot(); | |
1533 calls_made_hot++; | |
1534 amount_grown += est_size; | |
1535 amount_to_grow -= est_size; | |
1536 } | |
1537 | |
1538 if (calls_made_hot > 0) set_major_progress(); | |
1539 return calls_made_hot; | |
1540 } | |
1541 | |
1542 | |
1543 //----------------------------Finish_Warm-------------------------------------- | |
1544 void Compile::Finish_Warm() { | |
1545 if (!InlineWarmCalls) return; | |
1546 if (failing()) return; | |
1547 if (warm_calls() == NULL) return; | |
1548 | |
1549 // Clean up loose ends, if we are out of space for inlining. | |
1550 WarmCallInfo* call; | |
1551 while ((call = pop_warm_call()) != NULL) { | |
1552 call->make_cold(); | |
1553 } | |
1554 } | |
1555 | |
1556 | |
1557 //------------------------------Optimize--------------------------------------- | |
1558 // Given a graph, optimize it. | |
1559 void Compile::Optimize() { | |
1560 TracePhase t1("optimizer", &_t_optimizer, true); | |
1561 | |
1562 #ifndef PRODUCT | |
1563 if (env()->break_at_compile()) { | |
1564 BREAKPOINT; | |
1565 } | |
1566 | |
1567 #endif | |
1568 | |
1569 ResourceMark rm; | |
1570 int loop_opts_cnt; | |
1571 | |
1572 NOT_PRODUCT( verify_graph_edges(); ) | |
1573 | |
222 | 1574 print_method("After Parsing"); |
0 | 1575 |
1576 { | |
1577 // Iterative Global Value Numbering, including ideal transforms | |
1578 // Initialize IterGVN with types and values from parse-time GVN | |
1579 PhaseIterGVN igvn(initial_gvn()); | |
1580 { | |
1581 NOT_PRODUCT( TracePhase t2("iterGVN", &_t_iterGVN, TimeCompiler); ) | |
1582 igvn.optimize(); | |
1583 } | |
1584 | |
1585 print_method("Iter GVN 1", 2); | |
1586 | |
1587 if (failing()) return; | |
1588 | |
1589 // Loop transforms on the ideal graph. Range Check Elimination, | |
1590 // peeling, unrolling, etc. | |
1591 | |
1592 // Set loop opts counter | |
1593 loop_opts_cnt = num_loop_opts(); | |
1594 if((loop_opts_cnt > 0) && (has_loops() || has_split_ifs())) { | |
1595 { | |
1596 TracePhase t2("idealLoop", &_t_idealLoop, true); | |
921
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
859
diff
changeset
|
1597 PhaseIdealLoop ideal_loop( igvn, true ); |
0 | 1598 loop_opts_cnt--; |
1599 if (major_progress()) print_method("PhaseIdealLoop 1", 2); | |
1600 if (failing()) return; | |
1601 } | |
1602 // Loop opts pass if partial peeling occurred in previous pass | |
1603 if(PartialPeelLoop && major_progress() && (loop_opts_cnt > 0)) { | |
1604 TracePhase t3("idealLoop", &_t_idealLoop, true); | |
921
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
859
diff
changeset
|
1605 PhaseIdealLoop ideal_loop( igvn, false ); |
0 | 1606 loop_opts_cnt--; |
1607 if (major_progress()) print_method("PhaseIdealLoop 2", 2); | |
1608 if (failing()) return; | |
1609 } | |
1610 // Loop opts pass for loop-unrolling before CCP | |
1611 if(major_progress() && (loop_opts_cnt > 0)) { | |
1612 TracePhase t4("idealLoop", &_t_idealLoop, true); | |
921
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
859
diff
changeset
|
1613 PhaseIdealLoop ideal_loop( igvn, false ); |
0 | 1614 loop_opts_cnt--; |
1615 if (major_progress()) print_method("PhaseIdealLoop 3", 2); | |
1616 } | |
921
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
859
diff
changeset
|
1617 if (!failing()) { |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
859
diff
changeset
|
1618 // Verify that last round of loop opts produced a valid graph |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
859
diff
changeset
|
1619 NOT_PRODUCT( TracePhase t2("idealLoopVerify", &_t_idealLoopVerify, TimeCompiler); ) |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
859
diff
changeset
|
1620 PhaseIdealLoop::verify(igvn); |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
859
diff
changeset
|
1621 } |
0 | 1622 } |
1623 if (failing()) return; | |
1624 | |
1625 // Conditional Constant Propagation; | |
1626 PhaseCCP ccp( &igvn ); | |
1627 assert( true, "Break here to ccp.dump_nodes_and_types(_root,999,1)"); | |
1628 { | |
1629 TracePhase t2("ccp", &_t_ccp, true); | |
1630 ccp.do_transform(); | |
1631 } | |
1632 print_method("PhaseCPP 1", 2); | |
1633 | |
1634 assert( true, "Break here to ccp.dump_old2new_map()"); | |
1635 | |
1636 // Iterative Global Value Numbering, including ideal transforms | |
1637 { | |
1638 NOT_PRODUCT( TracePhase t2("iterGVN2", &_t_iterGVN2, TimeCompiler); ) | |
1639 igvn = ccp; | |
1640 igvn.optimize(); | |
1641 } | |
1642 | |
1643 print_method("Iter GVN 2", 2); | |
1644 | |
1645 if (failing()) return; | |
1646 | |
1647 // Loop transforms on the ideal graph. Range Check Elimination, | |
1648 // peeling, unrolling, etc. | |
1649 if(loop_opts_cnt > 0) { | |
1650 debug_only( int cnt = 0; ); | |
1651 while(major_progress() && (loop_opts_cnt > 0)) { | |
1652 TracePhase t2("idealLoop", &_t_idealLoop, true); | |
1653 assert( cnt++ < 40, "infinite cycle in loop optimization" ); | |
921
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
859
diff
changeset
|
1654 PhaseIdealLoop ideal_loop( igvn, true ); |
0 | 1655 loop_opts_cnt--; |
1656 if (major_progress()) print_method("PhaseIdealLoop iterations", 2); | |
1657 if (failing()) return; | |
1658 } | |
1659 } | |
921
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
859
diff
changeset
|
1660 |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
859
diff
changeset
|
1661 { |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
859
diff
changeset
|
1662 // Verify that all previous optimizations produced a valid graph |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
859
diff
changeset
|
1663 // at least to this point, even if no loop optimizations were done. |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
859
diff
changeset
|
1664 NOT_PRODUCT( TracePhase t2("idealLoopVerify", &_t_idealLoopVerify, TimeCompiler); ) |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
859
diff
changeset
|
1665 PhaseIdealLoop::verify(igvn); |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
859
diff
changeset
|
1666 } |
046932b72aa2
6862956: PhaseIdealLoop should have a CFG verification mode
never
parents:
859
diff
changeset
|
1667 |
0 | 1668 { |
1669 NOT_PRODUCT( TracePhase t2("macroExpand", &_t_macroExpand, TimeCompiler); ) | |
1670 PhaseMacroExpand mex(igvn); | |
1671 if (mex.expand_macro_nodes()) { | |
1672 assert(failing(), "must bail out w/ explicit message"); | |
1673 return; | |
1674 } | |
1675 } | |
1676 | |
1677 } // (End scope of igvn; run destructor if necessary for asserts.) | |
1678 | |
1679 // A method with only infinite loops has no edges entering loops from root | |
1680 { | |
1681 NOT_PRODUCT( TracePhase t2("graphReshape", &_t_graphReshaping, TimeCompiler); ) | |
1682 if (final_graph_reshaping()) { | |
1683 assert(failing(), "must bail out w/ explicit message"); | |
1684 return; | |
1685 } | |
1686 } | |
1687 | |
1688 print_method("Optimize finished", 2); | |
1689 } | |
1690 | |
1691 | |
1692 //------------------------------Code_Gen--------------------------------------- | |
1693 // Given a graph, generate code for it | |
1694 void Compile::Code_Gen() { | |
1695 if (failing()) return; | |
1696 | |
1697 // Perform instruction selection. You might think we could reclaim Matcher | |
1698 // memory PDQ, but actually the Matcher is used in generating spill code. | |
1699 // Internals of the Matcher (including some VectorSets) must remain live | |
1700 // for awhile - thus I cannot reclaim Matcher memory lest a VectorSet usage | |
1701 // set a bit in reclaimed memory. | |
1702 | |
1703 // In debug mode can dump m._nodes.dump() for mapping of ideal to machine | |
1704 // nodes. Mapping is only valid at the root of each matched subtree. | |
1705 NOT_PRODUCT( verify_graph_edges(); ) | |
1706 | |
1707 Node_List proj_list; | |
1708 Matcher m(proj_list); | |
1709 _matcher = &m; | |
1710 { | |
1711 TracePhase t2("matcher", &_t_matcher, true); | |
1712 m.match(); | |
1713 } | |
1714 // In debug mode can dump m._nodes.dump() for mapping of ideal to machine | |
1715 // nodes. Mapping is only valid at the root of each matched subtree. | |
1716 NOT_PRODUCT( verify_graph_edges(); ) | |
1717 | |
1718 // If you have too many nodes, or if matching has failed, bail out | |
1719 check_node_count(0, "out of nodes matching instructions"); | |
1720 if (failing()) return; | |
1721 | |
1722 // Build a proper-looking CFG | |
1723 PhaseCFG cfg(node_arena(), root(), m); | |
1724 _cfg = &cfg; | |
1725 { | |
1726 NOT_PRODUCT( TracePhase t2("scheduler", &_t_scheduler, TimeCompiler); ) | |
1727 cfg.Dominators(); | |
1728 if (failing()) return; | |
1729 | |
1730 NOT_PRODUCT( verify_graph_edges(); ) | |
1731 | |
1732 cfg.Estimate_Block_Frequency(); | |
1733 cfg.GlobalCodeMotion(m,unique(),proj_list); | |
1734 | |
1735 print_method("Global code motion", 2); | |
1736 | |
1737 if (failing()) return; | |
1738 NOT_PRODUCT( verify_graph_edges(); ) | |
1739 | |
1740 debug_only( cfg.verify(); ) | |
1741 } | |
1742 NOT_PRODUCT( verify_graph_edges(); ) | |
1743 | |
1744 PhaseChaitin regalloc(unique(),cfg,m); | |
1745 _regalloc = ®alloc; | |
1746 { | |
1747 TracePhase t2("regalloc", &_t_registerAllocation, true); | |
1748 // Perform any platform dependent preallocation actions. This is used, | |
1749 // for example, to avoid taking an implicit null pointer exception | |
1750 // using the frame pointer on win95. | |
1751 _regalloc->pd_preallocate_hook(); | |
1752 | |
1753 // Perform register allocation. After Chaitin, use-def chains are | |
1754 // no longer accurate (at spill code) and so must be ignored. | |
1755 // Node->LRG->reg mappings are still accurate. | |
1756 _regalloc->Register_Allocate(); | |
1757 | |
1758 // Bail out if the allocator builds too many nodes | |
1759 if (failing()) return; | |
1760 } | |
1761 | |
1762 // Prior to register allocation we kept empty basic blocks in case the | |
1763 // the allocator needed a place to spill. After register allocation we | |
1764 // are not adding any new instructions. If any basic block is empty, we | |
1765 // can now safely remove it. | |
1766 { | |
418 | 1767 NOT_PRODUCT( TracePhase t2("blockOrdering", &_t_blockOrdering, TimeCompiler); ) |
1768 cfg.remove_empty(); | |
1769 if (do_freq_based_layout()) { | |
1770 PhaseBlockLayout layout(cfg); | |
1771 } else { | |
1772 cfg.set_loop_alignment(); | |
1773 } | |
1774 cfg.fixup_flow(); | |
0 | 1775 } |
1776 | |
1777 // Perform any platform dependent postallocation verifications. | |
1778 debug_only( _regalloc->pd_postallocate_verify_hook(); ) | |
1779 | |
1780 // Apply peephole optimizations | |
1781 if( OptoPeephole ) { | |
1782 NOT_PRODUCT( TracePhase t2("peephole", &_t_peephole, TimeCompiler); ) | |
1783 PhasePeephole peep( _regalloc, cfg); | |
1784 peep.do_transform(); | |
1785 } | |
1786 | |
1787 // Convert Nodes to instruction bits in a buffer | |
1788 { | |
1789 // %%%% workspace merge brought two timers together for one job | |
1790 TracePhase t2a("output", &_t_output, true); | |
1791 NOT_PRODUCT( TraceTime t2b(NULL, &_t_codeGeneration, TimeCompiler, false); ) | |
1792 Output(); | |
1793 } | |
1794 | |
222 | 1795 print_method("Final Code"); |
0 | 1796 |
1797 // He's dead, Jim. | |
1798 _cfg = (PhaseCFG*)0xdeadbeef; | |
1799 _regalloc = (PhaseChaitin*)0xdeadbeef; | |
1800 } | |
1801 | |
1802 | |
1803 //------------------------------dump_asm--------------------------------------- | |
1804 // Dump formatted assembly | |
1805 #ifndef PRODUCT | |
1806 void Compile::dump_asm(int *pcs, uint pc_limit) { | |
1807 bool cut_short = false; | |
1808 tty->print_cr("#"); | |
1809 tty->print("# "); _tf->dump(); tty->cr(); | |
1810 tty->print_cr("#"); | |
1811 | |
1812 // For all blocks | |
1813 int pc = 0x0; // Program counter | |
1814 char starts_bundle = ' '; | |
1815 _regalloc->dump_frame(); | |
1816 | |
1817 Node *n = NULL; | |
1818 for( uint i=0; i<_cfg->_num_blocks; i++ ) { | |
1819 if (VMThread::should_terminate()) { cut_short = true; break; } | |
1820 Block *b = _cfg->_blocks[i]; | |
1821 if (b->is_connector() && !Verbose) continue; | |
1822 n = b->_nodes[0]; | |
1823 if (pcs && n->_idx < pc_limit) | |
1824 tty->print("%3.3x ", pcs[n->_idx]); | |
1825 else | |
1826 tty->print(" "); | |
1827 b->dump_head( &_cfg->_bbs ); | |
1828 if (b->is_connector()) { | |
1829 tty->print_cr(" # Empty connector block"); | |
1830 } else if (b->num_preds() == 2 && b->pred(1)->is_CatchProj() && b->pred(1)->as_CatchProj()->_con == CatchProjNode::fall_through_index) { | |
1831 tty->print_cr(" # Block is sole successor of call"); | |
1832 } | |
1833 | |
1834 // For all instructions | |
1835 Node *delay = NULL; | |
1836 for( uint j = 0; j<b->_nodes.size(); j++ ) { | |
1837 if (VMThread::should_terminate()) { cut_short = true; break; } | |
1838 n = b->_nodes[j]; | |
1839 if (valid_bundle_info(n)) { | |
1840 Bundle *bundle = node_bundling(n); | |
1841 if (bundle->used_in_unconditional_delay()) { | |
1842 delay = n; | |
1843 continue; | |
1844 } | |
1845 if (bundle->starts_bundle()) | |
1846 starts_bundle = '+'; | |
1847 } | |
1848 | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
100
diff
changeset
|
1849 if (WizardMode) n->dump(); |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
100
diff
changeset
|
1850 |
0 | 1851 if( !n->is_Region() && // Dont print in the Assembly |
1852 !n->is_Phi() && // a few noisely useless nodes | |
1853 !n->is_Proj() && | |
1854 !n->is_MachTemp() && | |
1855 !n->is_Catch() && // Would be nice to print exception table targets | |
1856 !n->is_MergeMem() && // Not very interesting | |
1857 !n->is_top() && // Debug info table constants | |
1858 !(n->is_Con() && !n->is_Mach())// Debug info table constants | |
1859 ) { | |
1860 if (pcs && n->_idx < pc_limit) | |
1861 tty->print("%3.3x", pcs[n->_idx]); | |
1862 else | |
1863 tty->print(" "); | |
1864 tty->print(" %c ", starts_bundle); | |
1865 starts_bundle = ' '; | |
1866 tty->print("\t"); | |
1867 n->format(_regalloc, tty); | |
1868 tty->cr(); | |
1869 } | |
1870 | |
1871 // If we have an instruction with a delay slot, and have seen a delay, | |
1872 // then back up and print it | |
1873 if (valid_bundle_info(n) && node_bundling(n)->use_unconditional_delay()) { | |
1874 assert(delay != NULL, "no unconditional delay instruction"); | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
100
diff
changeset
|
1875 if (WizardMode) delay->dump(); |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
100
diff
changeset
|
1876 |
0 | 1877 if (node_bundling(delay)->starts_bundle()) |
1878 starts_bundle = '+'; | |
1879 if (pcs && n->_idx < pc_limit) | |
1880 tty->print("%3.3x", pcs[n->_idx]); | |
1881 else | |
1882 tty->print(" "); | |
1883 tty->print(" %c ", starts_bundle); | |
1884 starts_bundle = ' '; | |
1885 tty->print("\t"); | |
1886 delay->format(_regalloc, tty); | |
1887 tty->print_cr(""); | |
1888 delay = NULL; | |
1889 } | |
1890 | |
1891 // Dump the exception table as well | |
1892 if( n->is_Catch() && (Verbose || WizardMode) ) { | |
1893 // Print the exception table for this offset | |
1894 _handler_table.print_subtable_for(pc); | |
1895 } | |
1896 } | |
1897 | |
1898 if (pcs && n->_idx < pc_limit) | |
1899 tty->print_cr("%3.3x", pcs[n->_idx]); | |
1900 else | |
1901 tty->print_cr(""); | |
1902 | |
1903 assert(cut_short || delay == NULL, "no unconditional delay branch"); | |
1904 | |
1905 } // End of per-block dump | |
1906 tty->print_cr(""); | |
1907 | |
1908 if (cut_short) tty->print_cr("*** disassembly is cut short ***"); | |
1909 } | |
1910 #endif | |
1911 | |
1912 //------------------------------Final_Reshape_Counts--------------------------- | |
1913 // This class defines counters to help identify when a method | |
1914 // may/must be executed using hardware with only 24-bit precision. | |
1915 struct Final_Reshape_Counts : public StackObj { | |
1916 int _call_count; // count non-inlined 'common' calls | |
1917 int _float_count; // count float ops requiring 24-bit precision | |
1918 int _double_count; // count double ops requiring more precision | |
1919 int _java_call_count; // count non-inlined 'java' calls | |
859
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
856
diff
changeset
|
1920 int _inner_loop_count; // count loops which need alignment |
0 | 1921 VectorSet _visited; // Visitation flags |
1922 Node_List _tests; // Set of IfNodes & PCTableNodes | |
1923 | |
1924 Final_Reshape_Counts() : | |
859
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
856
diff
changeset
|
1925 _call_count(0), _float_count(0), _double_count(0), |
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
856
diff
changeset
|
1926 _java_call_count(0), _inner_loop_count(0), |
0 | 1927 _visited( Thread::current()->resource_area() ) { } |
1928 | |
1929 void inc_call_count () { _call_count ++; } | |
1930 void inc_float_count () { _float_count ++; } | |
1931 void inc_double_count() { _double_count++; } | |
1932 void inc_java_call_count() { _java_call_count++; } | |
859
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
856
diff
changeset
|
1933 void inc_inner_loop_count() { _inner_loop_count++; } |
0 | 1934 |
1935 int get_call_count () const { return _call_count ; } | |
1936 int get_float_count () const { return _float_count ; } | |
1937 int get_double_count() const { return _double_count; } | |
1938 int get_java_call_count() const { return _java_call_count; } | |
859
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
856
diff
changeset
|
1939 int get_inner_loop_count() const { return _inner_loop_count; } |
0 | 1940 }; |
1941 | |
1942 static bool oop_offset_is_sane(const TypeInstPtr* tp) { | |
1943 ciInstanceKlass *k = tp->klass()->as_instance_klass(); | |
1944 // Make sure the offset goes inside the instance layout. | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
100
diff
changeset
|
1945 return k->contains_field_offset(tp->offset()); |
0 | 1946 // Note that OffsetBot and OffsetTop are very negative. |
1947 } | |
1948 | |
1949 //------------------------------final_graph_reshaping_impl---------------------- | |
1950 // Implement items 1-5 from final_graph_reshaping below. | |
859
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
856
diff
changeset
|
1951 static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc ) { |
0 | 1952 |
168
7793bd37a336
6705887: Compressed Oops: generate x64 addressing and implicit null checks with narrow oops
kvn
parents:
164
diff
changeset
|
1953 if ( n->outcnt() == 0 ) return; // dead node |
0 | 1954 uint nop = n->Opcode(); |
1955 | |
1956 // Check for 2-input instruction with "last use" on right input. | |
1957 // Swap to left input. Implements item (2). | |
1958 if( n->req() == 3 && // two-input instruction | |
1959 n->in(1)->outcnt() > 1 && // left use is NOT a last use | |
1960 (!n->in(1)->is_Phi() || n->in(1)->in(2) != n) && // it is not data loop | |
1961 n->in(2)->outcnt() == 1 &&// right use IS a last use | |
1962 !n->in(2)->is_Con() ) { // right use is not a constant | |
1963 // Check for commutative opcode | |
1964 switch( nop ) { | |
1965 case Op_AddI: case Op_AddF: case Op_AddD: case Op_AddL: | |
1966 case Op_MaxI: case Op_MinI: | |
1967 case Op_MulI: case Op_MulF: case Op_MulD: case Op_MulL: | |
1968 case Op_AndL: case Op_XorL: case Op_OrL: | |
1969 case Op_AndI: case Op_XorI: case Op_OrI: { | |
1970 // Move "last use" input to left by swapping inputs | |
1971 n->swap_edges(1, 2); | |
1972 break; | |
1973 } | |
1974 default: | |
1975 break; | |
1976 } | |
1977 } | |
1978 | |
1979 // Count FPU ops and common calls, implements item (3) | |
1980 switch( nop ) { | |
1981 // Count all float operations that may use FPU | |
1982 case Op_AddF: | |
1983 case Op_SubF: | |
1984 case Op_MulF: | |
1985 case Op_DivF: | |
1986 case Op_NegF: | |
1987 case Op_ModF: | |
1988 case Op_ConvI2F: | |
1989 case Op_ConF: | |
1990 case Op_CmpF: | |
1991 case Op_CmpF3: | |
1992 // case Op_ConvL2F: // longs are split into 32-bit halves | |
859
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
856
diff
changeset
|
1993 frc.inc_float_count(); |
0 | 1994 break; |
1995 | |
1996 case Op_ConvF2D: | |
1997 case Op_ConvD2F: | |
859
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
856
diff
changeset
|
1998 frc.inc_float_count(); |
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
856
diff
changeset
|
1999 frc.inc_double_count(); |
0 | 2000 break; |
2001 | |
2002 // Count all double operations that may use FPU | |
2003 case Op_AddD: | |
2004 case Op_SubD: | |
2005 case Op_MulD: | |
2006 case Op_DivD: | |
2007 case Op_NegD: | |
2008 case Op_ModD: | |
2009 case Op_ConvI2D: | |
2010 case Op_ConvD2I: | |
2011 // case Op_ConvL2D: // handled by leaf call | |
2012 // case Op_ConvD2L: // handled by leaf call | |
2013 case Op_ConD: | |
2014 case Op_CmpD: | |
2015 case Op_CmpD3: | |
859
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
856
diff
changeset
|
2016 frc.inc_double_count(); |
0 | 2017 break; |
2018 case Op_Opaque1: // Remove Opaque Nodes before matching | |
2019 case Op_Opaque2: // Remove Opaque Nodes before matching | |
168
7793bd37a336
6705887: Compressed Oops: generate x64 addressing and implicit null checks with narrow oops
kvn
parents:
164
diff
changeset
|
2020 n->subsume_by(n->in(1)); |
0 | 2021 break; |
2022 case Op_CallStaticJava: | |
2023 case Op_CallJava: | |
2024 case Op_CallDynamicJava: | |
859
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
856
diff
changeset
|
2025 frc.inc_java_call_count(); // Count java call site; |
0 | 2026 case Op_CallRuntime: |
2027 case Op_CallLeaf: | |
2028 case Op_CallLeafNoFP: { | |
2029 assert( n->is_Call(), "" ); | |
2030 CallNode *call = n->as_Call(); | |
2031 // Count call sites where the FP mode bit would have to be flipped. | |
2032 // Do not count uncommon runtime calls: | |
2033 // uncommon_trap, _complete_monitor_locking, _complete_monitor_unlocking, | |
2034 // _new_Java, _new_typeArray, _new_objArray, _rethrow_Java, ... | |
2035 if( !call->is_CallStaticJava() || !call->as_CallStaticJava()->_name ) { | |
859
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
856
diff
changeset
|
2036 frc.inc_call_count(); // Count the call site |
0 | 2037 } else { // See if uncommon argument is shared |
2038 Node *n = call->in(TypeFunc::Parms); | |
2039 int nop = n->Opcode(); | |
2040 // Clone shared simple arguments to uncommon calls, item (1). | |
2041 if( n->outcnt() > 1 && | |
2042 !n->is_Proj() && | |
2043 nop != Op_CreateEx && | |
2044 nop != Op_CheckCastPP && | |
331
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2045 nop != Op_DecodeN && |
0 | 2046 !n->is_Mem() ) { |
2047 Node *x = n->clone(); | |
2048 call->set_req( TypeFunc::Parms, x ); | |
2049 } | |
2050 } | |
2051 break; | |
2052 } | |
2053 | |
2054 case Op_StoreD: | |
2055 case Op_LoadD: | |
2056 case Op_LoadD_unaligned: | |
859
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
856
diff
changeset
|
2057 frc.inc_double_count(); |
0 | 2058 goto handle_mem; |
2059 case Op_StoreF: | |
2060 case Op_LoadF: | |
859
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
856
diff
changeset
|
2061 frc.inc_float_count(); |
0 | 2062 goto handle_mem; |
2063 | |
2064 case Op_StoreB: | |
2065 case Op_StoreC: | |
2066 case Op_StoreCM: | |
2067 case Op_StorePConditional: | |
2068 case Op_StoreI: | |
2069 case Op_StoreL: | |
420
a1980da045cc
6462850: generate biased locking code in C2 ideal graph
kvn
parents:
418
diff
changeset
|
2070 case Op_StoreIConditional: |
0 | 2071 case Op_StoreLConditional: |
2072 case Op_CompareAndSwapI: | |
2073 case Op_CompareAndSwapL: | |
2074 case Op_CompareAndSwapP: | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
100
diff
changeset
|
2075 case Op_CompareAndSwapN: |
0 | 2076 case Op_StoreP: |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
100
diff
changeset
|
2077 case Op_StoreN: |
0 | 2078 case Op_LoadB: |
624 | 2079 case Op_LoadUB: |
558
3b5ac9e7e6ea
6796746: rename LoadC (char) opcode class to LoadUS (unsigned short)
twisti
parents:
492
diff
changeset
|
2080 case Op_LoadUS: |
0 | 2081 case Op_LoadI: |
624 | 2082 case Op_LoadUI2L: |
0 | 2083 case Op_LoadKlass: |
164
c436414a719e
6703890: Compressed Oops: add LoadNKlass node to generate narrow oops (32-bits) compare instructions
kvn
parents:
163
diff
changeset
|
2084 case Op_LoadNKlass: |
0 | 2085 case Op_LoadL: |
2086 case Op_LoadL_unaligned: | |
2087 case Op_LoadPLocked: | |
2088 case Op_LoadLLocked: | |
2089 case Op_LoadP: | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
100
diff
changeset
|
2090 case Op_LoadN: |
0 | 2091 case Op_LoadRange: |
2092 case Op_LoadS: { | |
2093 handle_mem: | |
2094 #ifdef ASSERT | |
2095 if( VerifyOptoOopOffsets ) { | |
2096 assert( n->is_Mem(), "" ); | |
2097 MemNode *mem = (MemNode*)n; | |
2098 // Check to see if address types have grounded out somehow. | |
2099 const TypeInstPtr *tp = mem->in(MemNode::Address)->bottom_type()->isa_instptr(); | |
2100 assert( !tp || oop_offset_is_sane(tp), "" ); | |
2101 } | |
2102 #endif | |
2103 break; | |
2104 } | |
2105 | |
2106 case Op_AddP: { // Assert sane base pointers | |
182
44abbb0d4c18
6709093: Compressed Oops: reduce size of compiled methods
kvn
parents:
168
diff
changeset
|
2107 Node *addp = n->in(AddPNode::Address); |
0 | 2108 assert( !addp->is_AddP() || |
2109 addp->in(AddPNode::Base)->is_top() || // Top OK for allocation | |
2110 addp->in(AddPNode::Base) == n->in(AddPNode::Base), | |
2111 "Base pointers must match" ); | |
182
44abbb0d4c18
6709093: Compressed Oops: reduce size of compiled methods
kvn
parents:
168
diff
changeset
|
2112 #ifdef _LP64 |
44abbb0d4c18
6709093: Compressed Oops: reduce size of compiled methods
kvn
parents:
168
diff
changeset
|
2113 if (UseCompressedOops && |
44abbb0d4c18
6709093: Compressed Oops: reduce size of compiled methods
kvn
parents:
168
diff
changeset
|
2114 addp->Opcode() == Op_ConP && |
44abbb0d4c18
6709093: Compressed Oops: reduce size of compiled methods
kvn
parents:
168
diff
changeset
|
2115 addp == n->in(AddPNode::Base) && |
44abbb0d4c18
6709093: Compressed Oops: reduce size of compiled methods
kvn
parents:
168
diff
changeset
|
2116 n->in(AddPNode::Offset)->is_Con()) { |
44abbb0d4c18
6709093: Compressed Oops: reduce size of compiled methods
kvn
parents:
168
diff
changeset
|
2117 // Use addressing with narrow klass to load with offset on x86. |
44abbb0d4c18
6709093: Compressed Oops: reduce size of compiled methods
kvn
parents:
168
diff
changeset
|
2118 // On sparc loading 32-bits constant and decoding it have less |
44abbb0d4c18
6709093: Compressed Oops: reduce size of compiled methods
kvn
parents:
168
diff
changeset
|
2119 // instructions (4) then load 64-bits constant (7). |
44abbb0d4c18
6709093: Compressed Oops: reduce size of compiled methods
kvn
parents:
168
diff
changeset
|
2120 // Do this transformation here since IGVN will convert ConN back to ConP. |
44abbb0d4c18
6709093: Compressed Oops: reduce size of compiled methods
kvn
parents:
168
diff
changeset
|
2121 const Type* t = addp->bottom_type(); |
44abbb0d4c18
6709093: Compressed Oops: reduce size of compiled methods
kvn
parents:
168
diff
changeset
|
2122 if (t->isa_oopptr()) { |
44abbb0d4c18
6709093: Compressed Oops: reduce size of compiled methods
kvn
parents:
168
diff
changeset
|
2123 Node* nn = NULL; |
44abbb0d4c18
6709093: Compressed Oops: reduce size of compiled methods
kvn
parents:
168
diff
changeset
|
2124 |
44abbb0d4c18
6709093: Compressed Oops: reduce size of compiled methods
kvn
parents:
168
diff
changeset
|
2125 // Look for existing ConN node of the same exact type. |
44abbb0d4c18
6709093: Compressed Oops: reduce size of compiled methods
kvn
parents:
168
diff
changeset
|
2126 Compile* C = Compile::current(); |
44abbb0d4c18
6709093: Compressed Oops: reduce size of compiled methods
kvn
parents:
168
diff
changeset
|
2127 Node* r = C->root(); |
44abbb0d4c18
6709093: Compressed Oops: reduce size of compiled methods
kvn
parents:
168
diff
changeset
|
2128 uint cnt = r->outcnt(); |
44abbb0d4c18
6709093: Compressed Oops: reduce size of compiled methods
kvn
parents:
168
diff
changeset
|
2129 for (uint i = 0; i < cnt; i++) { |
44abbb0d4c18
6709093: Compressed Oops: reduce size of compiled methods
kvn
parents:
168
diff
changeset
|
2130 Node* m = r->raw_out(i); |
44abbb0d4c18
6709093: Compressed Oops: reduce size of compiled methods
kvn
parents:
168
diff
changeset
|
2131 if (m!= NULL && m->Opcode() == Op_ConN && |
221
1e026f8da827
6710487: More than half of JDI Regression tests hang with COOPs in -Xcomp mode
kvn
parents:
182
diff
changeset
|
2132 m->bottom_type()->make_ptr() == t) { |
182
44abbb0d4c18
6709093: Compressed Oops: reduce size of compiled methods
kvn
parents:
168
diff
changeset
|
2133 nn = m; |
44abbb0d4c18
6709093: Compressed Oops: reduce size of compiled methods
kvn
parents:
168
diff
changeset
|
2134 break; |
44abbb0d4c18
6709093: Compressed Oops: reduce size of compiled methods
kvn
parents:
168
diff
changeset
|
2135 } |
44abbb0d4c18
6709093: Compressed Oops: reduce size of compiled methods
kvn
parents:
168
diff
changeset
|
2136 } |
44abbb0d4c18
6709093: Compressed Oops: reduce size of compiled methods
kvn
parents:
168
diff
changeset
|
2137 if (nn != NULL) { |
44abbb0d4c18
6709093: Compressed Oops: reduce size of compiled methods
kvn
parents:
168
diff
changeset
|
2138 // Decode a narrow oop to match address |
44abbb0d4c18
6709093: Compressed Oops: reduce size of compiled methods
kvn
parents:
168
diff
changeset
|
2139 // [R12 + narrow_oop_reg<<3 + offset] |
44abbb0d4c18
6709093: Compressed Oops: reduce size of compiled methods
kvn
parents:
168
diff
changeset
|
2140 nn = new (C, 2) DecodeNNode(nn, t); |
44abbb0d4c18
6709093: Compressed Oops: reduce size of compiled methods
kvn
parents:
168
diff
changeset
|
2141 n->set_req(AddPNode::Base, nn); |
44abbb0d4c18
6709093: Compressed Oops: reduce size of compiled methods
kvn
parents:
168
diff
changeset
|
2142 n->set_req(AddPNode::Address, nn); |
44abbb0d4c18
6709093: Compressed Oops: reduce size of compiled methods
kvn
parents:
168
diff
changeset
|
2143 if (addp->outcnt() == 0) { |
44abbb0d4c18
6709093: Compressed Oops: reduce size of compiled methods
kvn
parents:
168
diff
changeset
|
2144 addp->disconnect_inputs(NULL); |
44abbb0d4c18
6709093: Compressed Oops: reduce size of compiled methods
kvn
parents:
168
diff
changeset
|
2145 } |
44abbb0d4c18
6709093: Compressed Oops: reduce size of compiled methods
kvn
parents:
168
diff
changeset
|
2146 } |
44abbb0d4c18
6709093: Compressed Oops: reduce size of compiled methods
kvn
parents:
168
diff
changeset
|
2147 } |
44abbb0d4c18
6709093: Compressed Oops: reduce size of compiled methods
kvn
parents:
168
diff
changeset
|
2148 } |
44abbb0d4c18
6709093: Compressed Oops: reduce size of compiled methods
kvn
parents:
168
diff
changeset
|
2149 #endif |
0 | 2150 break; |
2151 } | |
2152 | |
164
c436414a719e
6703890: Compressed Oops: add LoadNKlass node to generate narrow oops (32-bits) compare instructions
kvn
parents:
163
diff
changeset
|
2153 #ifdef _LP64 |
368
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2154 case Op_CastPP: |
642
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
624
diff
changeset
|
2155 if (n->in(1)->is_DecodeN() && Universe::narrow_oop_use_implicit_null_checks()) { |
368
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2156 Compile* C = Compile::current(); |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2157 Node* in1 = n->in(1); |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2158 const Type* t = n->bottom_type(); |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2159 Node* new_in1 = in1->clone(); |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2160 new_in1->as_DecodeN()->set_type(t); |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2161 |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2162 if (!Matcher::clone_shift_expressions) { |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2163 // |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2164 // x86, ARM and friends can handle 2 adds in addressing mode |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2165 // and Matcher can fold a DecodeN node into address by using |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2166 // a narrow oop directly and do implicit NULL check in address: |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2167 // |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2168 // [R12 + narrow_oop_reg<<3 + offset] |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2169 // NullCheck narrow_oop_reg |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2170 // |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2171 // On other platforms (Sparc) we have to keep new DecodeN node and |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2172 // use it to do implicit NULL check in address: |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2173 // |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2174 // decode_not_null narrow_oop_reg, base_reg |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2175 // [base_reg + offset] |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2176 // NullCheck base_reg |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2177 // |
605 | 2178 // Pin the new DecodeN node to non-null path on these platform (Sparc) |
368
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2179 // to keep the information to which NULL check the new DecodeN node |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2180 // corresponds to use it as value in implicit_null_check(). |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2181 // |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2182 new_in1->set_req(0, n->in(0)); |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2183 } |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2184 |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2185 n->subsume_by(new_in1); |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2186 if (in1->outcnt() == 0) { |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2187 in1->disconnect_inputs(NULL); |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2188 } |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2189 } |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2190 break; |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2191 |
164
c436414a719e
6703890: Compressed Oops: add LoadNKlass node to generate narrow oops (32-bits) compare instructions
kvn
parents:
163
diff
changeset
|
2192 case Op_CmpP: |
168
7793bd37a336
6705887: Compressed Oops: generate x64 addressing and implicit null checks with narrow oops
kvn
parents:
164
diff
changeset
|
2193 // Do this transformation here to preserve CmpPNode::sub() and |
7793bd37a336
6705887: Compressed Oops: generate x64 addressing and implicit null checks with narrow oops
kvn
parents:
164
diff
changeset
|
2194 // other TypePtr related Ideal optimizations (for example, ptr nullness). |
331
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2195 if (n->in(1)->is_DecodeN() || n->in(2)->is_DecodeN()) { |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2196 Node* in1 = n->in(1); |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2197 Node* in2 = n->in(2); |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2198 if (!in1->is_DecodeN()) { |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2199 in2 = in1; |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2200 in1 = n->in(2); |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2201 } |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2202 assert(in1->is_DecodeN(), "sanity"); |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2203 |
164
c436414a719e
6703890: Compressed Oops: add LoadNKlass node to generate narrow oops (32-bits) compare instructions
kvn
parents:
163
diff
changeset
|
2204 Compile* C = Compile::current(); |
331
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2205 Node* new_in2 = NULL; |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2206 if (in2->is_DecodeN()) { |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2207 new_in2 = in2->in(1); |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2208 } else if (in2->Opcode() == Op_ConP) { |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2209 const Type* t = in2->bottom_type(); |
642
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
624
diff
changeset
|
2210 if (t == TypePtr::NULL_PTR && Universe::narrow_oop_use_implicit_null_checks()) { |
368
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2211 new_in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR); |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2212 // |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2213 // This transformation together with CastPP transformation above |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2214 // will generated code for implicit NULL checks for compressed oops. |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2215 // |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2216 // The original code after Optimize() |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2217 // |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2218 // LoadN memory, narrow_oop_reg |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2219 // decode narrow_oop_reg, base_reg |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2220 // CmpP base_reg, NULL |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2221 // CastPP base_reg // NotNull |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2222 // Load [base_reg + offset], val_reg |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2223 // |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2224 // after these transformations will be |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2225 // |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2226 // LoadN memory, narrow_oop_reg |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2227 // CmpN narrow_oop_reg, NULL |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2228 // decode_not_null narrow_oop_reg, base_reg |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2229 // Load [base_reg + offset], val_reg |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2230 // |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2231 // and the uncommon path (== NULL) will use narrow_oop_reg directly |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2232 // since narrow oops can be used in debug info now (see the code in |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2233 // final_graph_reshaping_walk()). |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2234 // |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2235 // At the end the code will be matched to |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2236 // on x86: |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2237 // |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2238 // Load_narrow_oop memory, narrow_oop_reg |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2239 // Load [R12 + narrow_oop_reg<<3 + offset], val_reg |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2240 // NullCheck narrow_oop_reg |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2241 // |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2242 // and on sparc: |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2243 // |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2244 // Load_narrow_oop memory, narrow_oop_reg |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2245 // decode_not_null narrow_oop_reg, base_reg |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2246 // Load [base_reg + offset], val_reg |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2247 // NullCheck base_reg |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2248 // |
164
c436414a719e
6703890: Compressed Oops: add LoadNKlass node to generate narrow oops (32-bits) compare instructions
kvn
parents:
163
diff
changeset
|
2249 } else if (t->isa_oopptr()) { |
331
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2250 new_in2 = ConNode::make(C, t->make_narrowoop()); |
164
c436414a719e
6703890: Compressed Oops: add LoadNKlass node to generate narrow oops (32-bits) compare instructions
kvn
parents:
163
diff
changeset
|
2251 } |
c436414a719e
6703890: Compressed Oops: add LoadNKlass node to generate narrow oops (32-bits) compare instructions
kvn
parents:
163
diff
changeset
|
2252 } |
331
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2253 if (new_in2 != NULL) { |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2254 Node* cmpN = new (C, 3) CmpNNode(in1->in(1), new_in2); |
168
7793bd37a336
6705887: Compressed Oops: generate x64 addressing and implicit null checks with narrow oops
kvn
parents:
164
diff
changeset
|
2255 n->subsume_by( cmpN ); |
331
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2256 if (in1->outcnt() == 0) { |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2257 in1->disconnect_inputs(NULL); |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2258 } |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2259 if (in2->outcnt() == 0) { |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2260 in2->disconnect_inputs(NULL); |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2261 } |
164
c436414a719e
6703890: Compressed Oops: add LoadNKlass node to generate narrow oops (32-bits) compare instructions
kvn
parents:
163
diff
changeset
|
2262 } |
c436414a719e
6703890: Compressed Oops: add LoadNKlass node to generate narrow oops (32-bits) compare instructions
kvn
parents:
163
diff
changeset
|
2263 } |
293
c3e045194476
6731641: assert(m->adr_type() == mach->adr_type(),"matcher should not change adr type")
kvn
parents:
253
diff
changeset
|
2264 break; |
368
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2265 |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2266 case Op_DecodeN: |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2267 assert(!n->in(1)->is_EncodeP(), "should be optimized out"); |
492
5496e074077f
6787050: assert(n->in(0) == 0L,"no control") with UseCompressedOops on sparcv9
kvn
parents:
490
diff
changeset
|
2268 // DecodeN could be pinned on Sparc where it can't be fold into |
5496e074077f
6787050: assert(n->in(0) == 0L,"no control") with UseCompressedOops on sparcv9
kvn
parents:
490
diff
changeset
|
2269 // an address expression, see the code for Op_CastPP above. |
5496e074077f
6787050: assert(n->in(0) == 0L,"no control") with UseCompressedOops on sparcv9
kvn
parents:
490
diff
changeset
|
2270 assert(n->in(0) == NULL || !Matcher::clone_shift_expressions, "no control except on sparc"); |
368
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2271 break; |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2272 |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2273 case Op_EncodeP: { |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2274 Node* in1 = n->in(1); |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2275 if (in1->is_DecodeN()) { |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2276 n->subsume_by(in1->in(1)); |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2277 } else if (in1->Opcode() == Op_ConP) { |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2278 Compile* C = Compile::current(); |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2279 const Type* t = in1->bottom_type(); |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2280 if (t == TypePtr::NULL_PTR) { |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2281 n->subsume_by(ConNode::make(C, TypeNarrowOop::NULL_PTR)); |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2282 } else if (t->isa_oopptr()) { |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2283 n->subsume_by(ConNode::make(C, t->make_narrowoop())); |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2284 } |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2285 } |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2286 if (in1->outcnt() == 0) { |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2287 in1->disconnect_inputs(NULL); |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2288 } |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2289 break; |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2290 } |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2291 |
1080
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
2292 case Op_Proj: { |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
2293 if (OptimizeStringConcat) { |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
2294 ProjNode* p = n->as_Proj(); |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
2295 if (p->_is_io_use) { |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
2296 // Separate projections were used for the exception path which |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
2297 // are normally removed by a late inline. If it wasn't inlined |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
2298 // then they will hang around and should just be replaced with |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
2299 // the original one. |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
2300 Node* proj = NULL; |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
2301 // Replace with just one |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
2302 for (SimpleDUIterator i(p->in(0)); i.has_next(); i.next()) { |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
2303 Node *use = i.get(); |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
2304 if (use->is_Proj() && p != use && use->as_Proj()->_con == p->_con) { |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
2305 proj = use; |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
2306 break; |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
2307 } |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
2308 } |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
2309 assert(p != NULL, "must be found"); |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
2310 p->subsume_by(proj); |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
2311 } |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
2312 } |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
2313 break; |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
2314 } |
7c57aead6d3e
6892658: C2 should optimize some stringbuilder patterns
never
parents:
929
diff
changeset
|
2315 |
368
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2316 case Op_Phi: |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2317 if (n->as_Phi()->bottom_type()->isa_narrowoop()) { |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2318 // The EncodeP optimization may create Phi with the same edges |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2319 // for all paths. It is not handled well by Register Allocator. |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2320 Node* unique_in = n->in(1); |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2321 assert(unique_in != NULL, ""); |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2322 uint cnt = n->req(); |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2323 for (uint i = 2; i < cnt; i++) { |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2324 Node* m = n->in(i); |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2325 assert(m != NULL, ""); |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2326 if (unique_in != m) |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2327 unique_in = NULL; |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2328 } |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2329 if (unique_in != NULL) { |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2330 n->subsume_by(unique_in); |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2331 } |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2332 } |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2333 break; |
36ccc817fca4
6747051: Improve code and implicit null check generation for compressed oops
kvn
parents:
367
diff
changeset
|
2334 |
164
c436414a719e
6703890: Compressed Oops: add LoadNKlass node to generate narrow oops (32-bits) compare instructions
kvn
parents:
163
diff
changeset
|
2335 #endif |
c436414a719e
6703890: Compressed Oops: add LoadNKlass node to generate narrow oops (32-bits) compare instructions
kvn
parents:
163
diff
changeset
|
2336 |
0 | 2337 case Op_ModI: |
2338 if (UseDivMod) { | |
2339 // Check if a%b and a/b both exist | |
2340 Node* d = n->find_similar(Op_DivI); | |
2341 if (d) { | |
2342 // Replace them with a fused divmod if supported | |
2343 Compile* C = Compile::current(); | |
2344 if (Matcher::has_match_rule(Op_DivModI)) { | |
2345 DivModINode* divmod = DivModINode::make(C, n); | |
168
7793bd37a336
6705887: Compressed Oops: generate x64 addressing and implicit null checks with narrow oops
kvn
parents:
164
diff
changeset
|
2346 d->subsume_by(divmod->div_proj()); |
7793bd37a336
6705887: Compressed Oops: generate x64 addressing and implicit null checks with narrow oops
kvn
parents:
164
diff
changeset
|
2347 n->subsume_by(divmod->mod_proj()); |
0 | 2348 } else { |
2349 // replace a%b with a-((a/b)*b) | |
2350 Node* mult = new (C, 3) MulINode(d, d->in(2)); | |
2351 Node* sub = new (C, 3) SubINode(d->in(1), mult); | |
168
7793bd37a336
6705887: Compressed Oops: generate x64 addressing and implicit null checks with narrow oops
kvn
parents:
164
diff
changeset
|
2352 n->subsume_by( sub ); |
0 | 2353 } |
2354 } | |
2355 } | |
2356 break; | |
2357 | |
2358 case Op_ModL: | |
2359 if (UseDivMod) { | |
2360 // Check if a%b and a/b both exist | |
2361 Node* d = n->find_similar(Op_DivL); | |
2362 if (d) { | |
2363 // Replace them with a fused divmod if supported | |
2364 Compile* C = Compile::current(); | |
2365 if (Matcher::has_match_rule(Op_DivModL)) { | |
2366 DivModLNode* divmod = DivModLNode::make(C, n); | |
168
7793bd37a336
6705887: Compressed Oops: generate x64 addressing and implicit null checks with narrow oops
kvn
parents:
164
diff
changeset
|
2367 d->subsume_by(divmod->div_proj()); |
7793bd37a336
6705887: Compressed Oops: generate x64 addressing and implicit null checks with narrow oops
kvn
parents:
164
diff
changeset
|
2368 n->subsume_by(divmod->mod_proj()); |
0 | 2369 } else { |
2370 // replace a%b with a-((a/b)*b) | |
2371 Node* mult = new (C, 3) MulLNode(d, d->in(2)); | |
2372 Node* sub = new (C, 3) SubLNode(d->in(1), mult); | |
168
7793bd37a336
6705887: Compressed Oops: generate x64 addressing and implicit null checks with narrow oops
kvn
parents:
164
diff
changeset
|
2373 n->subsume_by( sub ); |
0 | 2374 } |
2375 } | |
2376 } | |
2377 break; | |
2378 | |
2379 case Op_Load16B: | |
2380 case Op_Load8B: | |
2381 case Op_Load4B: | |
2382 case Op_Load8S: | |
2383 case Op_Load4S: | |
2384 case Op_Load2S: | |
2385 case Op_Load8C: | |
2386 case Op_Load4C: | |
2387 case Op_Load2C: | |
2388 case Op_Load4I: | |
2389 case Op_Load2I: | |
2390 case Op_Load2L: | |
2391 case Op_Load4F: | |
2392 case Op_Load2F: | |
2393 case Op_Load2D: | |
2394 case Op_Store16B: | |
2395 case Op_Store8B: | |
2396 case Op_Store4B: | |
2397 case Op_Store8C: | |
2398 case Op_Store4C: | |
2399 case Op_Store2C: | |
2400 case Op_Store4I: | |
2401 case Op_Store2I: | |
2402 case Op_Store2L: | |
2403 case Op_Store4F: | |
2404 case Op_Store2F: | |
2405 case Op_Store2D: | |
2406 break; | |
2407 | |
2408 case Op_PackB: | |
2409 case Op_PackS: | |
2410 case Op_PackC: | |
2411 case Op_PackI: | |
2412 case Op_PackF: | |
2413 case Op_PackL: | |
2414 case Op_PackD: | |
2415 if (n->req()-1 > 2) { | |
2416 // Replace many operand PackNodes with a binary tree for matching | |
2417 PackNode* p = (PackNode*) n; | |
2418 Node* btp = p->binaryTreePack(Compile::current(), 1, n->req()); | |
168
7793bd37a336
6705887: Compressed Oops: generate x64 addressing and implicit null checks with narrow oops
kvn
parents:
164
diff
changeset
|
2419 n->subsume_by(btp); |
0 | 2420 } |
2421 break; | |
859
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
856
diff
changeset
|
2422 case Op_Loop: |
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
856
diff
changeset
|
2423 case Op_CountedLoop: |
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
856
diff
changeset
|
2424 if (n->as_Loop()->is_inner_loop()) { |
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
856
diff
changeset
|
2425 frc.inc_inner_loop_count(); |
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
856
diff
changeset
|
2426 } |
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
856
diff
changeset
|
2427 break; |
0 | 2428 default: |
2429 assert( !n->is_Call(), "" ); | |
2430 assert( !n->is_Mem(), "" ); | |
2431 break; | |
2432 } | |
127 | 2433 |
2434 // Collect CFG split points | |
2435 if (n->is_MultiBranch()) | |
859
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
856
diff
changeset
|
2436 frc._tests.push(n); |
0 | 2437 } |
2438 | |
2439 //------------------------------final_graph_reshaping_walk--------------------- | |
2440 // Replacing Opaque nodes with their input in final_graph_reshaping_impl(), | |
2441 // requires that the walk visits a node's inputs before visiting the node. | |
859
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
856
diff
changeset
|
2442 static void final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Reshape_Counts &frc ) { |
331
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2443 ResourceArea *area = Thread::current()->resource_area(); |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2444 Unique_Node_List sfpt(area); |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2445 |
859
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
856
diff
changeset
|
2446 frc._visited.set(root->_idx); // first, mark node as visited |
0 | 2447 uint cnt = root->req(); |
2448 Node *n = root; | |
2449 uint i = 0; | |
2450 while (true) { | |
2451 if (i < cnt) { | |
2452 // Place all non-visited non-null inputs onto stack | |
2453 Node* m = n->in(i); | |
2454 ++i; | |
859
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
856
diff
changeset
|
2455 if (m != NULL && !frc._visited.test_set(m->_idx)) { |
331
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2456 if (m->is_SafePoint() && m->as_SafePoint()->jvms() != NULL) |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2457 sfpt.push(m); |
0 | 2458 cnt = m->req(); |
2459 nstack.push(n, i); // put on stack parent and next input's index | |
2460 n = m; | |
2461 i = 0; | |
2462 } | |
2463 } else { | |
2464 // Now do post-visit work | |
859
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
856
diff
changeset
|
2465 final_graph_reshaping_impl( n, frc ); |
0 | 2466 if (nstack.is_empty()) |
2467 break; // finished | |
2468 n = nstack.node(); // Get node from stack | |
2469 cnt = n->req(); | |
2470 i = nstack.index(); | |
2471 nstack.pop(); // Shift to the next node on stack | |
2472 } | |
2473 } | |
331
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2474 |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2475 // Go over safepoints nodes to skip DecodeN nodes for debug edges. |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2476 // It could be done for an uncommon traps or any safepoints/calls |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2477 // if the DecodeN node is referenced only in a debug info. |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2478 while (sfpt.size() > 0) { |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2479 n = sfpt.pop(); |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2480 JVMState *jvms = n->as_SafePoint()->jvms(); |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2481 assert(jvms != NULL, "sanity"); |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2482 int start = jvms->debug_start(); |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2483 int end = n->req(); |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2484 bool is_uncommon = (n->is_CallStaticJava() && |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2485 n->as_CallStaticJava()->uncommon_trap_request() != 0); |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2486 for (int j = start; j < end; j++) { |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2487 Node* in = n->in(j); |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2488 if (in->is_DecodeN()) { |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2489 bool safe_to_skip = true; |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2490 if (!is_uncommon ) { |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2491 // Is it safe to skip? |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2492 for (uint i = 0; i < in->outcnt(); i++) { |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2493 Node* u = in->raw_out(i); |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2494 if (!u->is_SafePoint() || |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2495 u->is_Call() && u->as_Call()->has_non_debug_use(n)) { |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2496 safe_to_skip = false; |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2497 } |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2498 } |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2499 } |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2500 if (safe_to_skip) { |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2501 n->set_req(j, in->in(1)); |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2502 } |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2503 if (in->outcnt() == 0) { |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2504 in->disconnect_inputs(NULL); |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2505 } |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2506 } |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2507 } |
cecd8eb4e0ca
6706829: Compressed Oops: add debug info for narrow oops
kvn
parents:
293
diff
changeset
|
2508 } |
0 | 2509 } |
2510 | |
2511 //------------------------------final_graph_reshaping-------------------------- | |
2512 // Final Graph Reshaping. | |
2513 // | |
2514 // (1) Clone simple inputs to uncommon calls, so they can be scheduled late | |
2515 // and not commoned up and forced early. Must come after regular | |
2516 // optimizations to avoid GVN undoing the cloning. Clone constant | |
2517 // inputs to Loop Phis; these will be split by the allocator anyways. | |
2518 // Remove Opaque nodes. | |
2519 // (2) Move last-uses by commutative operations to the left input to encourage | |
2520 // Intel update-in-place two-address operations and better register usage | |
2521 // on RISCs. Must come after regular optimizations to avoid GVN Ideal | |
2522 // calls canonicalizing them back. | |
2523 // (3) Count the number of double-precision FP ops, single-precision FP ops | |
2524 // and call sites. On Intel, we can get correct rounding either by | |
2525 // forcing singles to memory (requires extra stores and loads after each | |
2526 // FP bytecode) or we can set a rounding mode bit (requires setting and | |
2527 // clearing the mode bit around call sites). The mode bit is only used | |
2528 // if the relative frequency of single FP ops to calls is low enough. | |
2529 // This is a key transform for SPEC mpeg_audio. | |
2530 // (4) Detect infinite loops; blobs of code reachable from above but not | |
2531 // below. Several of the Code_Gen algorithms fail on such code shapes, | |
2532 // so we simply bail out. Happens a lot in ZKM.jar, but also happens | |
2533 // from time to time in other codes (such as -Xcomp finalizer loops, etc). | |
2534 // Detection is by looking for IfNodes where only 1 projection is | |
2535 // reachable from below or CatchNodes missing some targets. | |
2536 // (5) Assert for insane oop offsets in debug mode. | |
2537 | |
2538 bool Compile::final_graph_reshaping() { | |
2539 // an infinite loop may have been eliminated by the optimizer, | |
2540 // in which case the graph will be empty. | |
2541 if (root()->req() == 1) { | |
2542 record_method_not_compilable("trivial infinite loop"); | |
2543 return true; | |
2544 } | |
2545 | |
859
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
856
diff
changeset
|
2546 Final_Reshape_Counts frc; |
0 | 2547 |
2548 // Visit everybody reachable! | |
2549 // Allocate stack of size C->unique()/2 to avoid frequent realloc | |
2550 Node_Stack nstack(unique() >> 1); | |
859
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
856
diff
changeset
|
2551 final_graph_reshaping_walk(nstack, root(), frc); |
0 | 2552 |
2553 // Check for unreachable (from below) code (i.e., infinite loops). | |
859
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
856
diff
changeset
|
2554 for( uint i = 0; i < frc._tests.size(); i++ ) { |
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
856
diff
changeset
|
2555 MultiBranchNode *n = frc._tests[i]->as_MultiBranch(); |
127 | 2556 // Get number of CFG targets. |
0 | 2557 // Note that PCTables include exception targets after calls. |
127 | 2558 uint required_outcnt = n->required_outcnt(); |
2559 if (n->outcnt() != required_outcnt) { | |
0 | 2560 // Check for a few special cases. Rethrow Nodes never take the |
2561 // 'fall-thru' path, so expected kids is 1 less. | |
2562 if (n->is_PCTable() && n->in(0) && n->in(0)->in(0)) { | |
2563 if (n->in(0)->in(0)->is_Call()) { | |
2564 CallNode *call = n->in(0)->in(0)->as_Call(); | |
2565 if (call->entry_point() == OptoRuntime::rethrow_stub()) { | |
127 | 2566 required_outcnt--; // Rethrow always has 1 less kid |
0 | 2567 } else if (call->req() > TypeFunc::Parms && |
2568 call->is_CallDynamicJava()) { | |
2569 // Check for null receiver. In such case, the optimizer has | |
2570 // detected that the virtual call will always result in a null | |
2571 // pointer exception. The fall-through projection of this CatchNode | |
2572 // will not be populated. | |
2573 Node *arg0 = call->in(TypeFunc::Parms); | |
2574 if (arg0->is_Type() && | |
2575 arg0->as_Type()->type()->higher_equal(TypePtr::NULL_PTR)) { | |
127 | 2576 required_outcnt--; |
0 | 2577 } |
2578 } else if (call->entry_point() == OptoRuntime::new_array_Java() && | |
2579 call->req() > TypeFunc::Parms+1 && | |
2580 call->is_CallStaticJava()) { | |
2581 // Check for negative array length. In such case, the optimizer has | |
2582 // detected that the allocation attempt will always result in an | |
2583 // exception. There is no fall-through projection of this CatchNode . | |
2584 Node *arg1 = call->in(TypeFunc::Parms+1); | |
2585 if (arg1->is_Type() && | |
2586 arg1->as_Type()->type()->join(TypeInt::POS)->empty()) { | |
127 | 2587 required_outcnt--; |
0 | 2588 } |
2589 } | |
2590 } | |
2591 } | |
127 | 2592 // Recheck with a better notion of 'required_outcnt' |
2593 if (n->outcnt() != required_outcnt) { | |
0 | 2594 record_method_not_compilable("malformed control flow"); |
2595 return true; // Not all targets reachable! | |
2596 } | |
2597 } | |
2598 // Check that I actually visited all kids. Unreached kids | |
2599 // must be infinite loops. | |
2600 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) | |
859
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
856
diff
changeset
|
2601 if (!frc._visited.test(n->fast_out(j)->_idx)) { |
0 | 2602 record_method_not_compilable("infinite loop"); |
2603 return true; // Found unvisited kid; must be unreach | |
2604 } | |
2605 } | |
2606 | |
2607 // If original bytecodes contained a mixture of floats and doubles | |
2608 // check if the optimizer has made it homogenous, item (3). | |
929
cd18bd5e667c
6873777: FPU control word optimization still performed with SSE
never
parents:
921
diff
changeset
|
2609 if( Use24BitFPMode && Use24BitFP && UseSSE == 0 && |
859
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
856
diff
changeset
|
2610 frc.get_float_count() > 32 && |
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
856
diff
changeset
|
2611 frc.get_double_count() == 0 && |
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
856
diff
changeset
|
2612 (10 * frc.get_call_count() < frc.get_float_count()) ) { |
0 | 2613 set_24_bit_selection_and_mode( false, true ); |
2614 } | |
2615 | |
859
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
856
diff
changeset
|
2616 set_java_calls(frc.get_java_call_count()); |
ea3f9723b5cf
6860599: nodes limit could be reached during Output phase
kvn
parents:
856
diff
changeset
|
2617 set_inner_loops(frc.get_inner_loop_count()); |
0 | 2618 |
2619 // No infinite loops, no reason to bail out. | |
2620 return false; | |
2621 } | |
2622 | |
2623 //-----------------------------too_many_traps---------------------------------- | |
2624 // Report if there are too many traps at the current method and bci. | |
2625 // Return true if there was a trap, and/or PerMethodTrapLimit is exceeded. | |
2626 bool Compile::too_many_traps(ciMethod* method, | |
2627 int bci, | |
2628 Deoptimization::DeoptReason reason) { | |
2629 ciMethodData* md = method->method_data(); | |
2630 if (md->is_empty()) { | |
2631 // Assume the trap has not occurred, or that it occurred only | |
2632 // because of a transient condition during start-up in the interpreter. | |
2633 return false; | |
2634 } | |
2635 if (md->has_trap_at(bci, reason) != 0) { | |
2636 // Assume PerBytecodeTrapLimit==0, for a more conservative heuristic. | |
2637 // Also, if there are multiple reasons, or if there is no per-BCI record, | |
2638 // assume the worst. | |
2639 if (log()) | |
2640 log()->elem("observe trap='%s' count='%d'", | |
2641 Deoptimization::trap_reason_name(reason), | |
2642 md->trap_count(reason)); | |
2643 return true; | |
2644 } else { | |
2645 // Ignore method/bci and see if there have been too many globally. | |
2646 return too_many_traps(reason, md); | |
2647 } | |
2648 } | |
2649 | |
2650 // Less-accurate variant which does not require a method and bci. | |
2651 bool Compile::too_many_traps(Deoptimization::DeoptReason reason, | |
2652 ciMethodData* logmd) { | |
2653 if (trap_count(reason) >= (uint)PerMethodTrapLimit) { | |
2654 // Too many traps globally. | |
2655 // Note that we use cumulative trap_count, not just md->trap_count. | |
2656 if (log()) { | |
2657 int mcount = (logmd == NULL)? -1: (int)logmd->trap_count(reason); | |
2658 log()->elem("observe trap='%s' count='0' mcount='%d' ccount='%d'", | |
2659 Deoptimization::trap_reason_name(reason), | |
2660 mcount, trap_count(reason)); | |
2661 } | |
2662 return true; | |
2663 } else { | |
2664 // The coast is clear. | |
2665 return false; | |
2666 } | |
2667 } | |
2668 | |
2669 //--------------------------too_many_recompiles-------------------------------- | |
2670 // Report if there are too many recompiles at the current method and bci. | |
2671 // Consults PerBytecodeRecompilationCutoff and PerMethodRecompilationCutoff. | |
2672 // Is not eager to return true, since this will cause the compiler to use | |
2673 // Action_none for a trap point, to avoid too many recompilations. | |
2674 bool Compile::too_many_recompiles(ciMethod* method, | |
2675 int bci, | |
2676 Deoptimization::DeoptReason reason) { | |
2677 ciMethodData* md = method->method_data(); | |
2678 if (md->is_empty()) { | |
2679 // Assume the trap has not occurred, or that it occurred only | |
2680 // because of a transient condition during start-up in the interpreter. | |
2681 return false; | |
2682 } | |
2683 // Pick a cutoff point well within PerBytecodeRecompilationCutoff. | |
2684 uint bc_cutoff = (uint) PerBytecodeRecompilationCutoff / 8; | |
2685 uint m_cutoff = (uint) PerMethodRecompilationCutoff / 2 + 1; // not zero | |
2686 Deoptimization::DeoptReason per_bc_reason | |
2687 = Deoptimization::reason_recorded_per_bytecode_if_any(reason); | |
2688 if ((per_bc_reason == Deoptimization::Reason_none | |
2689 || md->has_trap_at(bci, reason) != 0) | |
2690 // The trap frequency measure we care about is the recompile count: | |
2691 && md->trap_recompiled_at(bci) | |
2692 && md->overflow_recompile_count() >= bc_cutoff) { | |
2693 // Do not emit a trap here if it has already caused recompilations. | |
2694 // Also, if there are multiple reasons, or if there is no per-BCI record, | |
2695 // assume the worst. | |
2696 if (log()) | |
2697 log()->elem("observe trap='%s recompiled' count='%d' recompiles2='%d'", | |
2698 Deoptimization::trap_reason_name(reason), | |
2699 md->trap_count(reason), | |
2700 md->overflow_recompile_count()); | |
2701 return true; | |
2702 } else if (trap_count(reason) != 0 | |
2703 && decompile_count() >= m_cutoff) { | |
2704 // Too many recompiles globally, and we have seen this sort of trap. | |
2705 // Use cumulative decompile_count, not just md->decompile_count. | |
2706 if (log()) | |
2707 log()->elem("observe trap='%s' count='%d' mcount='%d' decompiles='%d' mdecompiles='%d'", | |
2708 Deoptimization::trap_reason_name(reason), | |
2709 md->trap_count(reason), trap_count(reason), | |
2710 md->decompile_count(), decompile_count()); | |
2711 return true; | |
2712 } else { | |
2713 // The coast is clear. | |
2714 return false; | |
2715 } | |
2716 } | |
2717 | |
2718 | |
2719 #ifndef PRODUCT | |
2720 //------------------------------verify_graph_edges--------------------------- | |
2721 // Walk the Graph and verify that there is a one-to-one correspondence | |
2722 // between Use-Def edges and Def-Use edges in the graph. | |
2723 void Compile::verify_graph_edges(bool no_dead_code) { | |
2724 if (VerifyGraphEdges) { | |
2725 ResourceArea *area = Thread::current()->resource_area(); | |
2726 Unique_Node_List visited(area); | |
2727 // Call recursive graph walk to check edges | |
2728 _root->verify_edges(visited); | |
2729 if (no_dead_code) { | |
2730 // Now make sure that no visited node is used by an unvisited node. | |
2731 bool dead_nodes = 0; | |
2732 Unique_Node_List checked(area); | |
2733 while (visited.size() > 0) { | |
2734 Node* n = visited.pop(); | |
2735 checked.push(n); | |
2736 for (uint i = 0; i < n->outcnt(); i++) { | |
2737 Node* use = n->raw_out(i); | |
2738 if (checked.member(use)) continue; // already checked | |
2739 if (visited.member(use)) continue; // already in the graph | |
2740 if (use->is_Con()) continue; // a dead ConNode is OK | |
2741 // At this point, we have found a dead node which is DU-reachable. | |
2742 if (dead_nodes++ == 0) | |
2743 tty->print_cr("*** Dead nodes reachable via DU edges:"); | |
2744 use->dump(2); | |
2745 tty->print_cr("---"); | |
2746 checked.push(use); // No repeats; pretend it is now checked. | |
2747 } | |
2748 } | |
2749 assert(dead_nodes == 0, "using nodes must be reachable from root"); | |
2750 } | |
2751 } | |
2752 } | |
2753 #endif | |
2754 | |
2755 // The Compile object keeps track of failure reasons separately from the ciEnv. | |
2756 // This is required because there is not quite a 1-1 relation between the | |
2757 // ciEnv and its compilation task and the Compile object. Note that one | |
2758 // ciEnv might use two Compile objects, if C2Compiler::compile_method decides | |
2759 // to backtrack and retry without subsuming loads. Other than this backtracking | |
2760 // behavior, the Compile's failure reason is quietly copied up to the ciEnv | |
2761 // by the logic in C2Compiler. | |
2762 void Compile::record_failure(const char* reason) { | |
2763 if (log() != NULL) { | |
2764 log()->elem("failure reason='%s' phase='compile'", reason); | |
2765 } | |
2766 if (_failure_reason == NULL) { | |
2767 // Record the first failure reason. | |
2768 _failure_reason = reason; | |
2769 } | |
222 | 2770 if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) { |
2771 C->print_method(_failure_reason); | |
2772 } | |
0 | 2773 _root = NULL; // flush the graph, too |
2774 } | |
2775 | |
2776 Compile::TracePhase::TracePhase(const char* name, elapsedTimer* accumulator, bool dolog) | |
2777 : TraceTime(NULL, accumulator, false NOT_PRODUCT( || TimeCompiler ), false) | |
2778 { | |
2779 if (dolog) { | |
2780 C = Compile::current(); | |
2781 _log = C->log(); | |
2782 } else { | |
2783 C = NULL; | |
2784 _log = NULL; | |
2785 } | |
2786 if (_log != NULL) { | |
2787 _log->begin_head("phase name='%s' nodes='%d'", name, C->unique()); | |
2788 _log->stamp(); | |
2789 _log->end_head(); | |
2790 } | |
2791 } | |
2792 | |
2793 Compile::TracePhase::~TracePhase() { | |
2794 if (_log != NULL) { | |
2795 _log->done("phase nodes='%d'", C->unique()); | |
2796 } | |
2797 } |