truffle: src/share/vm/opto/compile.cpp comparison

comparison src/share/vm/opto/compile.cpp @ 18041:52b4284cb496

Merge with jdk8u20-b26

author	Gilles Duboscq <duboscq@ssw.jku.at>
date	Wed, 15 Oct 2014 16:02:50 +0200
parents	4ca6dc0799b6 0bf37f737702
children	8b7a143aea6b

comparison

equal deleted inserted replaced

-:45d7b2c7029d
+:52b4284cb496
 /*
-* Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+* Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 */
 #include "precompiled.hpp"
 #include "asm/macroAssembler.hpp"
 #include "asm/macroAssembler.inline.hpp"
+#include "ci/ciReplay.hpp"
 #include "classfile/systemDictionary.hpp"
 #include "code/exceptionHandlerTable.hpp"
 #include "code/nmethod.hpp"
 #include "compiler/compileLog.hpp"
 #include "compiler/disassembler.hpp"
 # include "adfiles/ad_zero.hpp"
 #endif
 #ifdef TARGET_ARCH_MODEL_arm
 # include "adfiles/ad_arm.hpp"
 #endif
-#ifdef TARGET_ARCH_MODEL_ppc
+#ifdef TARGET_ARCH_MODEL_ppc_32
-# include "adfiles/ad_ppc.hpp"
+# include "adfiles/ad_ppc_32.hpp"
+#endif
+#ifdef TARGET_ARCH_MODEL_ppc_64
+# include "adfiles/ad_ppc_64.hpp"
 #endif
 // -------------------- Compile::mach_constant_base_node -----------------------
 // Constant table base node singleton.
 // shift is 0 in LP32 and 1 in LP64
 const int shift = (LogBytesPerWord - LogBytesPerInt);
 int words = _frame_slots >> shift;
 assert( words << shift == _frame_slots, "frame size must be properly aligned in LP64" );
 return words;
+}
+// To bang the stack of this compiled method we use the stack size
+// that the interpreter would need in case of a deoptimization. This
+// removes the need to bang the stack in the deoptimization blob which
+// in turn simplifies stack overflow handling.
+int Compile::bang_size_in_bytes() const {
+return MAX2(_interpreter_frame_size, frame_size_in_bytes());
 }
 // ============================================================================
 //------------------------------CompileWrapper---------------------------------
 class CompileWrapper : public StackObj {
 _in_scratch_emit_size(false),
 _dead_node_list(comp_arena()),
 _dead_node_count(0),
 #ifndef PRODUCT
 _trace_opto_output(TraceOptoOutput || method()->has_option("TraceOptoOutput")),
+_in_dump_cnt(0),
 _printer(IdealGraphPrinter::printer()),
 #endif
 _congraph(NULL),
+_replay_inline_data(NULL),
 _late_inlines(comp_arena(), 2, 0, NULL),
 _string_late_inlines(comp_arena(), 2, 0, NULL),
 _boxing_late_inlines(comp_arena(), 2, 0, NULL),
 _late_inlines_pos(0),
 _number_of_mh_late_inlines(0),
 _inlining_progress(false),
 _inlining_incrementally(false),
 _print_inlining_list(NULL),
 _print_inlining_idx(0),
-_preserve_jvm_state(0) {
+_preserve_jvm_state(0),
+_interpreter_frame_size(0) {
 C = this;
 CompileWrapper cw(this);
 #ifndef PRODUCT
 if (TimeCompiler2) {
 print_opto_assembly = true;
 }
 }
 set_print_assembly(print_opto_assembly);
 set_parsed_irreducible_loop(false);
+if (method()->has_option("ReplayInline")) {
+_replay_inline_data = ciReplay::load_inline_data(method(), entry_bci(), ci_env->comp_level());
+}
 #endif
 set_print_inlining(PrintInlining || method()->has_option("PrintInlining") NOT_PRODUCT( || PrintOptoInlining));
 set_print_intrinsics(PrintIntrinsics || method()->has_option("PrintIntrinsics"));
+set_has_irreducible_loop(true); // conservative until build_loop_tree() reset it
-if (ProfileTraps) {
+if (ProfileTraps RTM_OPT_ONLY( || UseRTMLocking )) {
 // Make sure the method being compiled gets its own MDO,
 // so we can at least track the decompile_count().
+// Need MDO to record RTM code generation state.
 method()->ensure_method_data();
 }
 Init(::AliasLevel);
 print_compile_messages();
-if (UseOldInlining || PrintCompilation NOT_PRODUCT( || PrintOpto) )
+_ilt = InlineTree::build_inline_tree_root();
-_ilt = InlineTree::build_inline_tree_root();
-else
-_ilt = NULL;
 // Even if NO memory addresses are used, MergeMem nodes must have at least 1 slice
 assert(num_alias_types() >= AliasIdxRaw, "");
 #define MINIMUM_NODE_HASH  1023
 }
 }
 #endif
 NOT_PRODUCT( verify_barriers(); )
+// Dump compilation data to replay it.
+if (method()->has_option("DumpReplay")) {
+env()->dump_replay_data(_compile_id);
+}
+if (method()->has_option("DumpInline") && (ilt() != NULL)) {
+env()->dump_inline_data(_compile_id);
+}
 // Now that we know the size of all the monitors we can add a fixed slot
 // for the original deopt pc.
 _orig_pc_slot =  fixed_slots();
 int next_slot = _orig_pc_slot + (sizeof(address) / VMRegImpl::stack_slot_size);
 set_fixed_slots(next_slot);
+// Compute when to use implicit null checks. Used by matching trap based
+// nodes and NullCheck optimization.
+set_allowed_deopt_reasons();
 // Now generate code
 Code_Gen();
 if (failing())  return;
 frame_size_in_words(), _oop_map_set,
 &_handler_table, &_inc_table,
 compiler,
 env()->comp_level(),
 has_unsafe_access(),
-SharedRuntime::is_wide_vector(max_vector_size())
+SharedRuntime::is_wide_vector(max_vector_size()),
+rtm_state()
 );
 if (log() != NULL) // Print code cache state into compiler log
 log()->code_cache_state();
 }
 _node_bundling_base(NULL),
 _java_calls(0),
 _inner_loops(0),
 #ifndef PRODUCT
 _trace_opto_output(TraceOptoOutput),
+_in_dump_cnt(0),
 _printer(NULL),
 #endif
 _dead_node_list(comp_arena()),
 _dead_node_count(0),
 _congraph(NULL),
+_replay_inline_data(NULL),
 _number_of_mh_late_inlines(0),
 _inlining_progress(false),
 _inlining_incrementally(false),
 _print_inlining_list(NULL),
 _print_inlining_idx(0),
-_preserve_jvm_state(0) {
+_preserve_jvm_state(0),
+_allowed_reasons(0),
+_interpreter_frame_size(0) {
 C = this;
 #ifndef PRODUCT
 TraceTime t1(NULL, &_t_totalCompilation, TimeCompiler, false);
 TraceTime t2(NULL, &_t_stubCompilation, TimeCompiler, false);
 set_print_assembly(PrintFrameConverterAssembly);
 set_parsed_irreducible_loop(false);
 #endif
+set_has_irreducible_loop(false); // no loops
 CompileWrapper cw(this);
 Init(/*AliasLevel=*/ 0);
 init_tf((*generator)());
 {
 set_max_inline_size(MaxInlineSize);
 set_freq_inline_size(FreqInlineSize);
 set_do_scheduling(OptoScheduling);
 set_do_count_invocations(false);
 set_do_method_data_update(false);
+set_rtm_state(NoRTM); // No RTM lock eliding by default
+#if INCLUDE_RTM_OPT
+if (UseRTMLocking && has_method() && (method()->method_data_or_null() != NULL)) {
+int rtm_state = method()->method_data()->rtm_state();
+if (method_has_option("NoRTMLockEliding") || ((rtm_state & NoRTM) != 0)) {
+// Don't generate RTM lock eliding code.
+set_rtm_state(NoRTM);
+} else if (method_has_option("UseRTMLockEliding") || ((rtm_state & UseRTM) != 0) || !UseRTMDeopt) {
+// Generate RTM lock eliding code without abort ratio calculation code.
+set_rtm_state(UseRTM);
+} else if (UseRTMDeopt) {
+// Generate RTM lock eliding code and include abort ratio calculation
+// code if UseRTMDeopt is on.
+set_rtm_state(ProfileRTM);
+}
+}
+#endif
 if (debug_info()->recording_non_safepoints()) {
 set_node_note_array(new(comp_arena()) GrowableArray<Node_Notes*>
 (comp_arena(), 8, 0, NULL));
 set_default_node_notes(Node_Notes::make(this));
 }
 for (DUIterator_Fast imax, i = root()->fast_outs(imax); i < imax; i++) {
 Node* start = root()->fast_out(i);
 if( start->is_Start() )
 return start->as_Start();
 }
-ShouldNotReachHere();
+fatal("Did not find Start node!");
 return NULL;
 }
 //-------------------------------immutable_memory-------------------------------------
 // Access immutable memory
 NOT_PRODUCT( TracePhase t2("peephole", &_t_peephole, TimeCompiler); )
 PhasePeephole peep( _regalloc, cfg);
 peep.do_transform();
 }
+// Do late expand if CPU requires this.
+if (Matcher::require_postalloc_expand) {
+NOT_PRODUCT(TracePhase t2c("postalloc_expand", &_t_postalloc_expand, true));
+cfg.postalloc_expand(_regalloc);
+}
 // Convert Nodes to instruction bits in a buffer
 {
 // %%%% workspace merge brought two timers together for one job
 TracePhase t2a("output", &_t_output, true);
 NOT_PRODUCT( TraceTime t2b(NULL, &_t_codeGeneration, TimeCompiler, false); )
 tty->print("   ");
 tty->print(" %c ", starts_bundle);
 starts_bundle = ' ';
 tty->print("\t");
 delay->format(_regalloc, tty);
-tty->print_cr("");
+tty->cr();
 delay = NULL;
 }
 // Dump the exception table as well
 if( n->is_Catch() && (Verbose || WizardMode) ) {
 }
 if (pcs && n->_idx < pc_limit)
 tty->print_cr("%3.3x", pcs[n->_idx]);
 else
-tty->print_cr("");
+tty->cr();
 assert(cut_short || delay == NULL, "no unconditional delay branch");
 } // End of per-block dump
-tty->print_cr("");
+tty->cr();
 if (cut_short)  tty->print_cr("*** disassembly is cut short ***");
 }
 #endif
 case Op_CmpD3:
 frc.inc_double_count();
 break;
 case Op_Opaque1:              // Remove Opaque Nodes before matching
 case Op_Opaque2:              // Remove Opaque Nodes before matching
+case Op_Opaque3:
 n->subsume_by(n->in(1), this);
 break;
 case Op_CallStaticJava:
 case Op_CallJava:
 case Op_CallDynamicJava:
 // confuses register allocation.
 if (n->req() > MemBarNode::Precedent) {
 n->set_req(MemBarNode::Precedent, top());
 }
 break;
-// Must set a control edge on all nodes that produce a FlagsProj
-// so they can't escape the block that consumes the flags.
-// Must also set the non throwing branch as the control
-// for all nodes that depends on the result. Unless the node
-// already have a control that isn't the control of the
-// flag producer
-case Op_FlagsProj:
-{
-MathExactNode* math = (MathExactNode*)  n->in(0);
-Node* ctrl = math->control_node();
-Node* non_throwing = math->non_throwing_branch();
-math->set_req(0, ctrl);
-Node* result = math->result_node();
-if (result != NULL) {
-for (DUIterator_Fast jmax, j = result->fast_outs(jmax); j < jmax; j++) {
-Node* out = result->fast_out(j);
-// Phi nodes shouldn't be moved. They would only match below if they
-// had the same control as the MathExactNode. The only time that
-// would happen is if the Phi is also an input to the MathExact
-//
-// Cmp nodes shouldn't have control set at all.
-if (out->is_Phi() ||
-out->is_Cmp()) {
-continue;
-}
-if (out->in(0) == NULL) {
-out->set_req(0, non_throwing);
-} else if (out->in(0) == ctrl) {
-out->set_req(0, non_throwing);
-}
-}
-}
-}
-break;
 default:
 assert( !n->is_Call(), "" );
 assert( !n->is_Mem(), "" );
 break;
 }
 if (i < cnt) {
 // Place all non-visited non-null inputs onto stack
 Node* m = n->in(i);
 ++i;
 if (m != NULL && !frc._visited.test_set(m->_idx)) {
-if (m->is_SafePoint() && m->as_SafePoint()->jvms() != NULL)
+if (m->is_SafePoint() && m->as_SafePoint()->jvms() != NULL) {
+// compute worst case interpreter size in case of a deoptimization
+update_interpreter_frame_size(m->as_SafePoint()->jvms()->interpreter_frame_size());
 sfpt.push(m);
+}
 cnt = m->req();
 nstack.push(n, i); // put on stack parent and next input's index
 n = m;
 i = 0;
 }
 if (md->is_empty()) {
 // Assume the trap has not occurred, or that it occurred only
 // because of a transient condition during start-up in the interpreter.
 return false;
 }
-if (md->has_trap_at(bci, reason) != 0) {
+ciMethod* m = Deoptimization::reason_is_speculate(reason) ? this->method() : NULL;
+if (md->has_trap_at(bci, m, reason) != 0) {
 // Assume PerBytecodeTrapLimit==0, for a more conservative heuristic.
 // Also, if there are multiple reasons, or if there is no per-BCI record,
 // assume the worst.
 if (log())
 log()->elem("observe trap='%s' count='%d'",
 }
 // Less-accurate variant which does not require a method and bci.
 bool Compile::too_many_traps(Deoptimization::DeoptReason reason,
 ciMethodData* logmd) {
-if (trap_count(reason) >= (uint)PerMethodTrapLimit) {
+if (trap_count(reason) >= Deoptimization::per_method_trap_limit(reason)) {
 // Too many traps globally.
 // Note that we use cumulative trap_count, not just md->trap_count.
 if (log()) {
 int mcount = (logmd == NULL)? -1: (int)logmd->trap_count(reason);
 log()->elem("observe trap='%s' count='0' mcount='%d' ccount='%d'",
 // Pick a cutoff point well within PerBytecodeRecompilationCutoff.
 uint bc_cutoff = (uint) PerBytecodeRecompilationCutoff / 8;
 uint m_cutoff  = (uint) PerMethodRecompilationCutoff / 2 + 1;  // not zero
 Deoptimization::DeoptReason per_bc_reason
 = Deoptimization::reason_recorded_per_bytecode_if_any(reason);
+ciMethod* m = Deoptimization::reason_is_speculate(reason) ? this->method() : NULL;
 if ((per_bc_reason == Deoptimization::Reason_none
-|| md->has_trap_at(bci, reason) != 0)
+|| md->has_trap_at(bci, m, reason) != 0)
 // The trap frequency measure we care about is the recompile count:
-&& md->trap_recompiled_at(bci)
+&& md->trap_recompiled_at(bci, m)
 && md->overflow_recompile_count() >= bc_cutoff) {
 // Do not emit a trap here if it has already caused recompilations.
 // Also, if there are multiple reasons, or if there is no per-BCI record,
 // assume the worst.
 if (log())
 // The coast is clear.
 return false;
 }
 }
+// Compute when not to trap. Used by matching trap based nodes and
+// NullCheck optimization.
+void Compile::set_allowed_deopt_reasons() {
+_allowed_reasons = 0;
+if (is_method_compilation()) {
+for (int rs = (int)Deoptimization::Reason_none+1; rs < Compile::trapHistLength; rs++) {
+assert(rs < BitsPerInt, "recode bit map");
+if (!too_many_traps((Deoptimization::DeoptReason) rs)) {
+_allowed_reasons |= nth_bit(rs);
+}
+}
+}
+}
 #ifndef PRODUCT
 //------------------------------verify_graph_edges---------------------------
 // Walk the Graph and verify that there is a one-to-one correspondence
 // between Use-Def edges and Def-Use edges in the graph.
 break;
 }
 default: ShouldNotReachHere();
 }
 assert(constant_addr, "consts section too small");
-assert((constant_addr - _masm.code()->consts()->start()) == con.offset(), err_msg_res("must be: %d == %d", constant_addr - _masm.code()->consts()->start(), con.offset()));
+assert((constant_addr - _masm.code()->consts()->start()) == con.offset(),
+err_msg_res("must be: %d == %d", (int) (constant_addr - _masm.code()->consts()->start()), (int)(con.offset())));
 }
 }
 int Compile::ConstantTable::find_offset(Constant& con) const {
 int idx = _constants.find(con);
 MacroAssembler _masm(&cb);
 address* jump_table_base = (address*) (_masm.code()->consts()->start() + offset);
 for (uint i = 0; i < n->outcnt(); i++) {
 address* constant_addr = &jump_table_base[i];
-assert(*constant_addr == (((address) n) + i), err_msg_res("all jump-table entries must contain adjusted node pointer: " INTPTR_FORMAT " == " INTPTR_FORMAT, *constant_addr, (((address) n) + i)));
+assert(*constant_addr == (((address) n) + i), err_msg_res("all jump-table entries must contain adjusted node pointer: " INTPTR_FORMAT " == " INTPTR_FORMAT, p2i(*constant_addr), p2i(((address) n) + i)));
 *constant_addr = cb.consts()->target(*labels.at(i), (address) constant_addr);
 cb.consts()->relocate((address) constant_addr, relocInfo::internal_word_type);
 }
 }
 if ( m == NULL ) continue;
 useful.push(m);
 }
 }
 for (int i = 0; i < _print_inlining_list->length(); i++) {
-tty->print(_print_inlining_list->adr_at(i)->ss()->as_string());
+tty->print("%s", _print_inlining_list->adr_at(i)->ss()->as_string());
 }
+}
+}
+// Dump inlining replay data to the stream.
+// Don't change thread state and acquire any locks.
+void Compile::dump_inline_data(outputStream* out) {
+InlineTree* inl_tree = ilt();
+if (inl_tree != NULL) {
+out->print(" inline %d", inl_tree->count());
+inl_tree->dump_replay_data(out);
 }
 }
 int Compile::cmp_expensive_nodes(Node* n1, Node* n2) {
 if (n1->Opcode() < n2->Opcode())      return -1;
 // Go over all type nodes that carry a speculative type, drop the
 // speculative part of the type and enqueue the node for an igvn
 // which may optimize it out.
 for (uint next = 0; next < worklist.size(); ++next) {
 Node *n  = worklist.at(next);
-if (n->is_Type() && n->as_Type()->type()->isa_oopptr() != NULL &&
+if (n->is_Type()) {
-n->as_Type()->type()->is_oopptr()->speculative() != NULL) {
 TypeNode* tn = n->as_Type();
-const TypeOopPtr* t = tn->type()->is_oopptr();
+const Type* t = tn->type();
-bool in_hash = igvn.hash_delete(n);
+const Type* t_no_spec = t->remove_speculative();
-assert(in_hash, "node should be in igvn hash table");
+if (t_no_spec != t) {
-tn->set_type(t->remove_speculative());
+bool in_hash = igvn.hash_delete(n);
-igvn.hash_insert(n);
+assert(in_hash, "node should be in igvn hash table");
-igvn._worklist.push(n); // give it a chance to go away
+tn->set_type(t_no_spec);
-modified++;
+igvn.hash_insert(n);
+igvn._worklist.push(n); // give it a chance to go away
+modified++;
+}
 }
 uint max = n->len();
 for( uint i = 0; i < max; ++i ) {
 Node *m = n->in(i);
 if (not_a_node(m))  continue;
 // Drop the speculative part of all types in the igvn's type table
 igvn.remove_speculative_types();
 if (modified > 0) {
 igvn.optimize();
 }
+#ifdef ASSERT
+// Verify that after the IGVN is over no speculative type has resurfaced
+worklist.clear();
+worklist.push(root());
+for (uint next = 0; next < worklist.size(); ++next) {
+Node *n  = worklist.at(next);
+const Type* t = igvn.type_or_null(n);
+assert((t == NULL) || (t == t->remove_speculative()), "no more speculative types");
+if (n->is_Type()) {
+t = n->as_Type()->type();
+assert(t == t->remove_speculative(), "no more speculative types");
+}
+uint max = n->len();
+for( uint i = 0; i < max; ++i ) {
+Node *m = n->in(i);
+if (not_a_node(m))  continue;
+worklist.push(m);
+}
+}
+igvn.check_no_speculative_types();
+#endif
 }
 }
 // Auxiliary method to support randomized stressing/fuzzing.
 //

Mercurial > hg > truffle

comparison src/share/vm/opto/compile.cpp @ 18041:52b4284cb496