Mercurial > hg > graal-compiler
diff src/share/vm/opto/compile.cpp @ 18041:52b4284cb496
Merge with jdk8u20-b26
author | Gilles Duboscq <duboscq@ssw.jku.at> |
---|---|
date | Wed, 15 Oct 2014 16:02:50 +0200 |
parents | 4ca6dc0799b6 0bf37f737702 |
children | 8b7a143aea6b |
line wrap: on
line diff
--- a/src/share/vm/opto/compile.cpp Thu Oct 16 10:21:29 2014 +0200 +++ b/src/share/vm/opto/compile.cpp Wed Oct 15 16:02:50 2014 +0200 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,6 +25,7 @@ #include "precompiled.hpp" #include "asm/macroAssembler.hpp" #include "asm/macroAssembler.inline.hpp" +#include "ci/ciReplay.hpp" #include "classfile/systemDictionary.hpp" #include "code/exceptionHandlerTable.hpp" #include "code/nmethod.hpp" @@ -81,8 +82,11 @@ #ifdef TARGET_ARCH_MODEL_arm # include "adfiles/ad_arm.hpp" #endif -#ifdef TARGET_ARCH_MODEL_ppc -# include "adfiles/ad_ppc.hpp" +#ifdef TARGET_ARCH_MODEL_ppc_32 +# include "adfiles/ad_ppc_32.hpp" +#endif +#ifdef TARGET_ARCH_MODEL_ppc_64 +# include "adfiles/ad_ppc_64.hpp" #endif @@ -435,6 +439,14 @@ return words; } +// To bang the stack of this compiled method we use the stack size +// that the interpreter would need in case of a deoptimization. This +// removes the need to bang the stack in the deoptimization blob which +// in turn simplifies stack overflow handling. +int Compile::bang_size_in_bytes() const { + return MAX2(_interpreter_frame_size, frame_size_in_bytes()); +} + // ============================================================================ //------------------------------CompileWrapper--------------------------------- class CompileWrapper : public StackObj { @@ -644,9 +656,11 @@ _dead_node_count(0), #ifndef PRODUCT _trace_opto_output(TraceOptoOutput || method()->has_option("TraceOptoOutput")), + _in_dump_cnt(0), _printer(IdealGraphPrinter::printer()), #endif _congraph(NULL), + _replay_inline_data(NULL), _late_inlines(comp_arena(), 2, 0, NULL), _string_late_inlines(comp_arena(), 2, 0, NULL), _boxing_late_inlines(comp_arena(), 2, 0, NULL), @@ -656,7 +670,8 @@ _inlining_incrementally(false), _print_inlining_list(NULL), _print_inlining_idx(0), - _preserve_jvm_state(0) { + _preserve_jvm_state(0), + _interpreter_frame_size(0) { C = this; CompileWrapper cw(this); @@ -680,13 +695,19 @@ } set_print_assembly(print_opto_assembly); set_parsed_irreducible_loop(false); + + if (method()->has_option("ReplayInline")) { + _replay_inline_data = ciReplay::load_inline_data(method(), entry_bci(), ci_env->comp_level()); + } #endif set_print_inlining(PrintInlining || method()->has_option("PrintInlining") NOT_PRODUCT( || PrintOptoInlining)); set_print_intrinsics(PrintIntrinsics || method()->has_option("PrintIntrinsics")); - - if (ProfileTraps) { + set_has_irreducible_loop(true); // conservative until build_loop_tree() reset it + + if (ProfileTraps RTM_OPT_ONLY( || UseRTMLocking )) { // Make sure the method being compiled gets its own MDO, // so we can at least track the decompile_count(). + // Need MDO to record RTM code generation state. method()->ensure_method_data(); } @@ -695,10 +716,7 @@ print_compile_messages(); - if (UseOldInlining || PrintCompilation NOT_PRODUCT( || PrintOpto) ) - _ilt = InlineTree::build_inline_tree_root(); - else - _ilt = NULL; + _ilt = InlineTree::build_inline_tree_root(); // Even if NO memory addresses are used, MergeMem nodes must have at least 1 slice assert(num_alias_types() >= AliasIdxRaw, ""); @@ -849,6 +867,15 @@ #endif NOT_PRODUCT( verify_barriers(); ) + + // Dump compilation data to replay it. + if (method()->has_option("DumpReplay")) { + env()->dump_replay_data(_compile_id); + } + if (method()->has_option("DumpInline") && (ilt() != NULL)) { + env()->dump_inline_data(_compile_id); + } + // Now that we know the size of all the monitors we can add a fixed slot // for the original deopt pc. @@ -856,6 +883,10 @@ int next_slot = _orig_pc_slot + (sizeof(address) / VMRegImpl::stack_slot_size); set_fixed_slots(next_slot); + // Compute when to use implicit null checks. Used by matching trap based + // nodes and NullCheck optimization. + set_allowed_deopt_reasons(); + // Now generate code Code_Gen(); if (failing()) return; @@ -887,7 +918,8 @@ compiler, env()->comp_level(), has_unsafe_access(), - SharedRuntime::is_wide_vector(max_vector_size()) + SharedRuntime::is_wide_vector(max_vector_size()), + rtm_state() ); if (log() != NULL) // Print code cache state into compiler log @@ -933,17 +965,21 @@ _inner_loops(0), #ifndef PRODUCT _trace_opto_output(TraceOptoOutput), + _in_dump_cnt(0), _printer(NULL), #endif _dead_node_list(comp_arena()), _dead_node_count(0), _congraph(NULL), + _replay_inline_data(NULL), _number_of_mh_late_inlines(0), _inlining_progress(false), _inlining_incrementally(false), _print_inlining_list(NULL), _print_inlining_idx(0), - _preserve_jvm_state(0) { + _preserve_jvm_state(0), + _allowed_reasons(0), + _interpreter_frame_size(0) { C = this; #ifndef PRODUCT @@ -952,6 +988,8 @@ set_print_assembly(PrintFrameConverterAssembly); set_parsed_irreducible_loop(false); #endif + set_has_irreducible_loop(false); // no loops + CompileWrapper cw(this); Init(/*AliasLevel=*/ 0); init_tf((*generator)()); @@ -1050,7 +1088,23 @@ set_do_scheduling(OptoScheduling); set_do_count_invocations(false); set_do_method_data_update(false); - + set_rtm_state(NoRTM); // No RTM lock eliding by default +#if INCLUDE_RTM_OPT + if (UseRTMLocking && has_method() && (method()->method_data_or_null() != NULL)) { + int rtm_state = method()->method_data()->rtm_state(); + if (method_has_option("NoRTMLockEliding") || ((rtm_state & NoRTM) != 0)) { + // Don't generate RTM lock eliding code. + set_rtm_state(NoRTM); + } else if (method_has_option("UseRTMLockEliding") || ((rtm_state & UseRTM) != 0) || !UseRTMDeopt) { + // Generate RTM lock eliding code without abort ratio calculation code. + set_rtm_state(UseRTM); + } else if (UseRTMDeopt) { + // Generate RTM lock eliding code and include abort ratio calculation + // code if UseRTMDeopt is on. + set_rtm_state(ProfileRTM); + } + } +#endif if (debug_info()->recording_non_safepoints()) { set_node_note_array(new(comp_arena()) GrowableArray<Node_Notes*> (comp_arena(), 8, 0, NULL)); @@ -1106,7 +1160,7 @@ if( start->is_Start() ) return start->as_Start(); } - ShouldNotReachHere(); + fatal("Did not find Start node!"); return NULL; } @@ -2248,6 +2302,12 @@ peep.do_transform(); } + // Do late expand if CPU requires this. + if (Matcher::require_postalloc_expand) { + NOT_PRODUCT(TracePhase t2c("postalloc_expand", &_t_postalloc_expand, true)); + cfg.postalloc_expand(_regalloc); + } + // Convert Nodes to instruction bits in a buffer { // %%%% workspace merge brought two timers together for one job @@ -2361,7 +2421,7 @@ starts_bundle = ' '; tty->print("\t"); delay->format(_regalloc, tty); - tty->print_cr(""); + tty->cr(); delay = NULL; } @@ -2375,12 +2435,12 @@ if (pcs && n->_idx < pc_limit) tty->print_cr("%3.3x", pcs[n->_idx]); else - tty->print_cr(""); + tty->cr(); assert(cut_short || delay == NULL, "no unconditional delay branch"); } // End of per-block dump - tty->print_cr(""); + tty->cr(); if (cut_short) tty->print_cr("*** disassembly is cut short ***"); } @@ -2552,6 +2612,7 @@ break; case Op_Opaque1: // Remove Opaque Nodes before matching case Op_Opaque2: // Remove Opaque Nodes before matching + case Op_Opaque3: n->subsume_by(n->in(1), this); break; case Op_CallStaticJava: @@ -2999,42 +3060,6 @@ n->set_req(MemBarNode::Precedent, top()); } break; - // Must set a control edge on all nodes that produce a FlagsProj - // so they can't escape the block that consumes the flags. - // Must also set the non throwing branch as the control - // for all nodes that depends on the result. Unless the node - // already have a control that isn't the control of the - // flag producer - case Op_FlagsProj: - { - MathExactNode* math = (MathExactNode*) n->in(0); - Node* ctrl = math->control_node(); - Node* non_throwing = math->non_throwing_branch(); - math->set_req(0, ctrl); - - Node* result = math->result_node(); - if (result != NULL) { - for (DUIterator_Fast jmax, j = result->fast_outs(jmax); j < jmax; j++) { - Node* out = result->fast_out(j); - // Phi nodes shouldn't be moved. They would only match below if they - // had the same control as the MathExactNode. The only time that - // would happen is if the Phi is also an input to the MathExact - // - // Cmp nodes shouldn't have control set at all. - if (out->is_Phi() || - out->is_Cmp()) { - continue; - } - - if (out->in(0) == NULL) { - out->set_req(0, non_throwing); - } else if (out->in(0) == ctrl) { - out->set_req(0, non_throwing); - } - } - } - } - break; default: assert( !n->is_Call(), "" ); assert( !n->is_Mem(), "" ); @@ -3063,8 +3088,12 @@ Node* m = n->in(i); ++i; if (m != NULL && !frc._visited.test_set(m->_idx)) { - if (m->is_SafePoint() && m->as_SafePoint()->jvms() != NULL) + if (m->is_SafePoint() && m->as_SafePoint()->jvms() != NULL) { + // compute worst case interpreter size in case of a deoptimization + update_interpreter_frame_size(m->as_SafePoint()->jvms()->interpreter_frame_size()); + sfpt.push(m); + } cnt = m->req(); nstack.push(n, i); // put on stack parent and next input's index n = m; @@ -3256,7 +3285,8 @@ // because of a transient condition during start-up in the interpreter. return false; } - if (md->has_trap_at(bci, reason) != 0) { + ciMethod* m = Deoptimization::reason_is_speculate(reason) ? this->method() : NULL; + if (md->has_trap_at(bci, m, reason) != 0) { // Assume PerBytecodeTrapLimit==0, for a more conservative heuristic. // Also, if there are multiple reasons, or if there is no per-BCI record, // assume the worst. @@ -3274,7 +3304,7 @@ // Less-accurate variant which does not require a method and bci. bool Compile::too_many_traps(Deoptimization::DeoptReason reason, ciMethodData* logmd) { - if (trap_count(reason) >= (uint)PerMethodTrapLimit) { + if (trap_count(reason) >= Deoptimization::per_method_trap_limit(reason)) { // Too many traps globally. // Note that we use cumulative trap_count, not just md->trap_count. if (log()) { @@ -3309,10 +3339,11 @@ uint m_cutoff = (uint) PerMethodRecompilationCutoff / 2 + 1; // not zero Deoptimization::DeoptReason per_bc_reason = Deoptimization::reason_recorded_per_bytecode_if_any(reason); + ciMethod* m = Deoptimization::reason_is_speculate(reason) ? this->method() : NULL; if ((per_bc_reason == Deoptimization::Reason_none - || md->has_trap_at(bci, reason) != 0) + || md->has_trap_at(bci, m, reason) != 0) // The trap frequency measure we care about is the recompile count: - && md->trap_recompiled_at(bci) + && md->trap_recompiled_at(bci, m) && md->overflow_recompile_count() >= bc_cutoff) { // Do not emit a trap here if it has already caused recompilations. // Also, if there are multiple reasons, or if there is no per-BCI record, @@ -3339,6 +3370,19 @@ } } +// Compute when not to trap. Used by matching trap based nodes and +// NullCheck optimization. +void Compile::set_allowed_deopt_reasons() { + _allowed_reasons = 0; + if (is_method_compilation()) { + for (int rs = (int)Deoptimization::Reason_none+1; rs < Compile::trapHistLength; rs++) { + assert(rs < BitsPerInt, "recode bit map"); + if (!too_many_traps((Deoptimization::DeoptReason) rs)) { + _allowed_reasons |= nth_bit(rs); + } + } + } +} #ifndef PRODUCT //------------------------------verify_graph_edges--------------------------- @@ -3641,7 +3685,8 @@ default: ShouldNotReachHere(); } assert(constant_addr, "consts section too small"); - assert((constant_addr - _masm.code()->consts()->start()) == con.offset(), err_msg_res("must be: %d == %d", constant_addr - _masm.code()->consts()->start(), con.offset())); + assert((constant_addr - _masm.code()->consts()->start()) == con.offset(), + err_msg_res("must be: %d == %d", (int) (constant_addr - _masm.code()->consts()->start()), (int)(con.offset()))); } } @@ -3721,7 +3766,7 @@ for (uint i = 0; i < n->outcnt(); i++) { address* constant_addr = &jump_table_base[i]; - assert(*constant_addr == (((address) n) + i), err_msg_res("all jump-table entries must contain adjusted node pointer: " INTPTR_FORMAT " == " INTPTR_FORMAT, *constant_addr, (((address) n) + i))); + assert(*constant_addr == (((address) n) + i), err_msg_res("all jump-table entries must contain adjusted node pointer: " INTPTR_FORMAT " == " INTPTR_FORMAT, p2i(*constant_addr), p2i(((address) n) + i))); *constant_addr = cb.consts()->target(*labels.at(i), (address) constant_addr); cb.consts()->relocate((address) constant_addr, relocInfo::internal_word_type); } @@ -3752,11 +3797,21 @@ } } for (int i = 0; i < _print_inlining_list->length(); i++) { - tty->print(_print_inlining_list->adr_at(i)->ss()->as_string()); + tty->print("%s", _print_inlining_list->adr_at(i)->ss()->as_string()); } } } +// Dump inlining replay data to the stream. +// Don't change thread state and acquire any locks. +void Compile::dump_inline_data(outputStream* out) { + InlineTree* inl_tree = ilt(); + if (inl_tree != NULL) { + out->print(" inline %d", inl_tree->count()); + inl_tree->dump_replay_data(out); + } +} + int Compile::cmp_expensive_nodes(Node* n1, Node* n2) { if (n1->Opcode() < n2->Opcode()) return -1; else if (n1->Opcode() > n2->Opcode()) return 1; @@ -3893,16 +3948,18 @@ // which may optimize it out. for (uint next = 0; next < worklist.size(); ++next) { Node *n = worklist.at(next); - if (n->is_Type() && n->as_Type()->type()->isa_oopptr() != NULL && - n->as_Type()->type()->is_oopptr()->speculative() != NULL) { + if (n->is_Type()) { TypeNode* tn = n->as_Type(); - const TypeOopPtr* t = tn->type()->is_oopptr(); - bool in_hash = igvn.hash_delete(n); - assert(in_hash, "node should be in igvn hash table"); - tn->set_type(t->remove_speculative()); - igvn.hash_insert(n); - igvn._worklist.push(n); // give it a chance to go away - modified++; + const Type* t = tn->type(); + const Type* t_no_spec = t->remove_speculative(); + if (t_no_spec != t) { + bool in_hash = igvn.hash_delete(n); + assert(in_hash, "node should be in igvn hash table"); + tn->set_type(t_no_spec); + igvn.hash_insert(n); + igvn._worklist.push(n); // give it a chance to go away + modified++; + } } uint max = n->len(); for( uint i = 0; i < max; ++i ) { @@ -3916,6 +3973,27 @@ if (modified > 0) { igvn.optimize(); } +#ifdef ASSERT + // Verify that after the IGVN is over no speculative type has resurfaced + worklist.clear(); + worklist.push(root()); + for (uint next = 0; next < worklist.size(); ++next) { + Node *n = worklist.at(next); + const Type* t = igvn.type_or_null(n); + assert((t == NULL) || (t == t->remove_speculative()), "no more speculative types"); + if (n->is_Type()) { + t = n->as_Type()->type(); + assert(t == t->remove_speculative(), "no more speculative types"); + } + uint max = n->len(); + for( uint i = 0; i < max; ++i ) { + Node *m = n->in(i); + if (not_a_node(m)) continue; + worklist.push(m); + } + } + igvn.check_no_speculative_types(); +#endif } }