Mercurial > hg > graal-compiler

diff src/share/vm/opto/compile.cpp @ 18041:52b4284cb496
Merge with jdk8u20-b26
author: Gilles Duboscq <duboscq@ssw.jku.at>
date: Wed, 15 Oct 2014 16:02:50 +0200
parents: 4ca6dc0799b6 0bf37f737702
children: 8b7a143aea6b
--- a/src/share/vm/opto/compile.cpp	Thu Oct 16 10:21:29 2014 +0200
+++ b/src/share/vm/opto/compile.cpp	Wed Oct 15 16:02:50 2014 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,6 +25,7 @@
 #include "precompiled.hpp"
 #include "asm/macroAssembler.hpp"
 #include "asm/macroAssembler.inline.hpp"
+#include "ci/ciReplay.hpp"
 #include "classfile/systemDictionary.hpp"
 #include "code/exceptionHandlerTable.hpp"
 #include "code/nmethod.hpp"
@@ -81,8 +82,11 @@
 #ifdef TARGET_ARCH_MODEL_arm
 # include "adfiles/ad_arm.hpp"
 #endif
-#ifdef TARGET_ARCH_MODEL_ppc
-# include "adfiles/ad_ppc.hpp"
+#ifdef TARGET_ARCH_MODEL_ppc_32
+# include "adfiles/ad_ppc_32.hpp"
+#endif
+#ifdef TARGET_ARCH_MODEL_ppc_64
+# include "adfiles/ad_ppc_64.hpp"
 #endif
 
 
@@ -435,6 +439,14 @@
   return words;
 }
 
+// To bang the stack of this compiled method we use the stack size
+// that the interpreter would need in case of a deoptimization. This
+// removes the need to bang the stack in the deoptimization blob which
+// in turn simplifies stack overflow handling.
+int Compile::bang_size_in_bytes() const {
+  return MAX2(_interpreter_frame_size, frame_size_in_bytes());
+}
+
 // ============================================================================
 //------------------------------CompileWrapper---------------------------------
 class CompileWrapper : public StackObj {
@@ -644,9 +656,11 @@
                   _dead_node_count(0),
 #ifndef PRODUCT
                   _trace_opto_output(TraceOptoOutput || method()->has_option("TraceOptoOutput")),
+                  _in_dump_cnt(0),
                   _printer(IdealGraphPrinter::printer()),
 #endif
                   _congraph(NULL),
+                  _replay_inline_data(NULL),
                   _late_inlines(comp_arena(), 2, 0, NULL),
                   _string_late_inlines(comp_arena(), 2, 0, NULL),
                   _boxing_late_inlines(comp_arena(), 2, 0, NULL),
@@ -656,7 +670,8 @@
                   _inlining_incrementally(false),
                   _print_inlining_list(NULL),
                   _print_inlining_idx(0),
-                  _preserve_jvm_state(0) {
+                  _preserve_jvm_state(0),
+                  _interpreter_frame_size(0) {
   C = this;
 
   CompileWrapper cw(this);
@@ -680,13 +695,19 @@
   }
   set_print_assembly(print_opto_assembly);
   set_parsed_irreducible_loop(false);
+
+  if (method()->has_option("ReplayInline")) {
+    _replay_inline_data = ciReplay::load_inline_data(method(), entry_bci(), ci_env->comp_level());
+  }
 #endif
   set_print_inlining(PrintInlining || method()->has_option("PrintInlining") NOT_PRODUCT( || PrintOptoInlining));
   set_print_intrinsics(PrintIntrinsics || method()->has_option("PrintIntrinsics"));
-
-  if (ProfileTraps) {
+  set_has_irreducible_loop(true); // conservative until build_loop_tree() reset it
+
+  if (ProfileTraps RTM_OPT_ONLY( || UseRTMLocking )) {
     // Make sure the method being compiled gets its own MDO,
     // so we can at least track the decompile_count().
+    // Need MDO to record RTM code generation state.
     method()->ensure_method_data();
   }
 
@@ -695,10 +716,7 @@
 
   print_compile_messages();
 
-  if (UseOldInlining || PrintCompilation NOT_PRODUCT( || PrintOpto) )
-    _ilt = InlineTree::build_inline_tree_root();
-  else
-    _ilt = NULL;
+  _ilt = InlineTree::build_inline_tree_root();
 
   // Even if NO memory addresses are used, MergeMem nodes must have at least 1 slice
   assert(num_alias_types() >= AliasIdxRaw, "");
@@ -849,6 +867,15 @@
 #endif
 
   NOT_PRODUCT( verify_barriers(); )
+
+  // Dump compilation data to replay it.
+  if (method()->has_option("DumpReplay")) {
+    env()->dump_replay_data(_compile_id);
+  }
+  if (method()->has_option("DumpInline") && (ilt() != NULL)) {
+    env()->dump_inline_data(_compile_id);
+  }
+
   // Now that we know the size of all the monitors we can add a fixed slot
   // for the original deopt pc.
 
@@ -856,6 +883,10 @@
   int next_slot = _orig_pc_slot + (sizeof(address) / VMRegImpl::stack_slot_size);
   set_fixed_slots(next_slot);
 
+  // Compute when to use implicit null checks. Used by matching trap based
+  // nodes and NullCheck optimization.
+  set_allowed_deopt_reasons();
+
   // Now generate code
   Code_Gen();
   if (failing())  return;
@@ -887,7 +918,8 @@
                            compiler,
                            env()->comp_level(),
                            has_unsafe_access(),
-                           SharedRuntime::is_wide_vector(max_vector_size())
+                           SharedRuntime::is_wide_vector(max_vector_size()),
+                           rtm_state()
                            );
 
     if (log() != NULL) // Print code cache state into compiler log
@@ -933,17 +965,21 @@
     _inner_loops(0),
 #ifndef PRODUCT
     _trace_opto_output(TraceOptoOutput),
+    _in_dump_cnt(0),
     _printer(NULL),
 #endif
     _dead_node_list(comp_arena()),
     _dead_node_count(0),
     _congraph(NULL),
+    _replay_inline_data(NULL),
     _number_of_mh_late_inlines(0),
     _inlining_progress(false),
     _inlining_incrementally(false),
     _print_inlining_list(NULL),
     _print_inlining_idx(0),
-    _preserve_jvm_state(0) {
+    _preserve_jvm_state(0),
+    _allowed_reasons(0),
+    _interpreter_frame_size(0) {
   C = this;
 
 #ifndef PRODUCT
@@ -952,6 +988,8 @@
   set_print_assembly(PrintFrameConverterAssembly);
   set_parsed_irreducible_loop(false);
 #endif
+  set_has_irreducible_loop(false); // no loops
+
   CompileWrapper cw(this);
   Init(/*AliasLevel=*/ 0);
   init_tf((*generator)());
@@ -1050,7 +1088,23 @@
   set_do_scheduling(OptoScheduling);
   set_do_count_invocations(false);
   set_do_method_data_update(false);
-
+  set_rtm_state(NoRTM); // No RTM lock eliding by default
+#if INCLUDE_RTM_OPT
+  if (UseRTMLocking && has_method() && (method()->method_data_or_null() != NULL)) {
+    int rtm_state = method()->method_data()->rtm_state();
+    if (method_has_option("NoRTMLockEliding") || ((rtm_state & NoRTM) != 0)) {
+      // Don't generate RTM lock eliding code.
+      set_rtm_state(NoRTM);
+    } else if (method_has_option("UseRTMLockEliding") || ((rtm_state & UseRTM) != 0) || !UseRTMDeopt) {
+      // Generate RTM lock eliding code without abort ratio calculation code.
+      set_rtm_state(UseRTM);
+    } else if (UseRTMDeopt) {
+      // Generate RTM lock eliding code and include abort ratio calculation
+      // code if UseRTMDeopt is on.
+      set_rtm_state(ProfileRTM);
+    }
+  }
+#endif
   if (debug_info()->recording_non_safepoints()) {
     set_node_note_array(new(comp_arena()) GrowableArray<Node_Notes*>
                         (comp_arena(), 8, 0, NULL));
@@ -1106,7 +1160,7 @@
     if( start->is_Start() )
       return start->as_Start();
   }
-  ShouldNotReachHere();
+  fatal("Did not find Start node!");
   return NULL;
 }
 
@@ -2248,6 +2302,12 @@
     peep.do_transform();
   }
 
+  // Do late expand if CPU requires this.
+  if (Matcher::require_postalloc_expand) {
+    NOT_PRODUCT(TracePhase t2c("postalloc_expand", &_t_postalloc_expand, true));
+    cfg.postalloc_expand(_regalloc);
+  }
+
   // Convert Nodes to instruction bits in a buffer
   {
     // %%%% workspace merge brought two timers together for one job
@@ -2361,7 +2421,7 @@
         starts_bundle = ' ';
         tty->print("\t");
         delay->format(_regalloc, tty);
-        tty->print_cr("");
+        tty->cr();
         delay = NULL;
       }
 
@@ -2375,12 +2435,12 @@
     if (pcs && n->_idx < pc_limit)
       tty->print_cr("%3.3x", pcs[n->_idx]);
     else
-      tty->print_cr("");
+      tty->cr();
 
     assert(cut_short || delay == NULL, "no unconditional delay branch");
 
   } // End of per-block dump
-  tty->print_cr("");
+  tty->cr();
 
   if (cut_short)  tty->print_cr("*** disassembly is cut short ***");
 }
@@ -2552,6 +2612,7 @@
     break;
   case Op_Opaque1:              // Remove Opaque Nodes before matching
   case Op_Opaque2:              // Remove Opaque Nodes before matching
+  case Op_Opaque3:
     n->subsume_by(n->in(1), this);
     break;
   case Op_CallStaticJava:
@@ -2999,42 +3060,6 @@
       n->set_req(MemBarNode::Precedent, top());
     }
     break;
-    // Must set a control edge on all nodes that produce a FlagsProj
-    // so they can't escape the block that consumes the flags.
-    // Must also set the non throwing branch as the control
-    // for all nodes that depends on the result. Unless the node
-    // already have a control that isn't the control of the
-    // flag producer
-  case Op_FlagsProj:
-    {
-      MathExactNode* math = (MathExactNode*)  n->in(0);
-      Node* ctrl = math->control_node();
-      Node* non_throwing = math->non_throwing_branch();
-      math->set_req(0, ctrl);
-
-      Node* result = math->result_node();
-      if (result != NULL) {
-        for (DUIterator_Fast jmax, j = result->fast_outs(jmax); j < jmax; j++) {
-          Node* out = result->fast_out(j);
-          // Phi nodes shouldn't be moved. They would only match below if they
-          // had the same control as the MathExactNode. The only time that
-          // would happen is if the Phi is also an input to the MathExact
-          //
-          // Cmp nodes shouldn't have control set at all.
-          if (out->is_Phi() ||
-              out->is_Cmp()) {
-            continue;
-          }
-
-          if (out->in(0) == NULL) {
-            out->set_req(0, non_throwing);
-          } else if (out->in(0) == ctrl) {
-            out->set_req(0, non_throwing);
-          }
-        }
-      }
-    }
-    break;
   default:
     assert( !n->is_Call(), "" );
     assert( !n->is_Mem(), "" );
@@ -3063,8 +3088,12 @@
       Node* m = n->in(i);
       ++i;
       if (m != NULL && !frc._visited.test_set(m->_idx)) {
-        if (m->is_SafePoint() && m->as_SafePoint()->jvms() != NULL)
+        if (m->is_SafePoint() && m->as_SafePoint()->jvms() != NULL) {
+          // compute worst case interpreter size in case of a deoptimization
+          update_interpreter_frame_size(m->as_SafePoint()->jvms()->interpreter_frame_size());
+
           sfpt.push(m);
+        }
         cnt = m->req();
         nstack.push(n, i); // put on stack parent and next input's index
         n = m;
@@ -3256,7 +3285,8 @@
     // because of a transient condition during start-up in the interpreter.
     return false;
   }
-  if (md->has_trap_at(bci, reason) != 0) {
+  ciMethod* m = Deoptimization::reason_is_speculate(reason) ? this->method() : NULL;
+  if (md->has_trap_at(bci, m, reason) != 0) {
     // Assume PerBytecodeTrapLimit==0, for a more conservative heuristic.
     // Also, if there are multiple reasons, or if there is no per-BCI record,
     // assume the worst.
@@ -3274,7 +3304,7 @@
 // Less-accurate variant which does not require a method and bci.
 bool Compile::too_many_traps(Deoptimization::DeoptReason reason,
                              ciMethodData* logmd) {
- if (trap_count(reason) >= (uint)PerMethodTrapLimit) {
+  if (trap_count(reason) >= Deoptimization::per_method_trap_limit(reason)) {
     // Too many traps globally.
     // Note that we use cumulative trap_count, not just md->trap_count.
     if (log()) {
@@ -3309,10 +3339,11 @@
   uint m_cutoff  = (uint) PerMethodRecompilationCutoff / 2 + 1;  // not zero
   Deoptimization::DeoptReason per_bc_reason
     = Deoptimization::reason_recorded_per_bytecode_if_any(reason);
+  ciMethod* m = Deoptimization::reason_is_speculate(reason) ? this->method() : NULL;
   if ((per_bc_reason == Deoptimization::Reason_none
-       || md->has_trap_at(bci, reason) != 0)
+       || md->has_trap_at(bci, m, reason) != 0)
       // The trap frequency measure we care about is the recompile count:
-      && md->trap_recompiled_at(bci)
+      && md->trap_recompiled_at(bci, m)
       && md->overflow_recompile_count() >= bc_cutoff) {
     // Do not emit a trap here if it has already caused recompilations.
     // Also, if there are multiple reasons, or if there is no per-BCI record,
@@ -3339,6 +3370,19 @@
   }
 }
 
+// Compute when not to trap. Used by matching trap based nodes and
+// NullCheck optimization.
+void Compile::set_allowed_deopt_reasons() {
+  _allowed_reasons = 0;
+  if (is_method_compilation()) {
+    for (int rs = (int)Deoptimization::Reason_none+1; rs < Compile::trapHistLength; rs++) {
+      assert(rs < BitsPerInt, "recode bit map");
+      if (!too_many_traps((Deoptimization::DeoptReason) rs)) {
+        _allowed_reasons |= nth_bit(rs);
+      }
+    }
+  }
+}
 
 #ifndef PRODUCT
 //------------------------------verify_graph_edges---------------------------
@@ -3641,7 +3685,8 @@
     default: ShouldNotReachHere();
     }
     assert(constant_addr, "consts section too small");
-    assert((constant_addr - _masm.code()->consts()->start()) == con.offset(), err_msg_res("must be: %d == %d", constant_addr - _masm.code()->consts()->start(), con.offset()));
+    assert((constant_addr - _masm.code()->consts()->start()) == con.offset(),
+            err_msg_res("must be: %d == %d", (int) (constant_addr - _masm.code()->consts()->start()), (int)(con.offset())));
   }
 }
 
@@ -3721,7 +3766,7 @@
 
   for (uint i = 0; i < n->outcnt(); i++) {
     address* constant_addr = &jump_table_base[i];
-    assert(*constant_addr == (((address) n) + i), err_msg_res("all jump-table entries must contain adjusted node pointer: " INTPTR_FORMAT " == " INTPTR_FORMAT, *constant_addr, (((address) n) + i)));
+    assert(*constant_addr == (((address) n) + i), err_msg_res("all jump-table entries must contain adjusted node pointer: " INTPTR_FORMAT " == " INTPTR_FORMAT, p2i(*constant_addr), p2i(((address) n) + i)));
     *constant_addr = cb.consts()->target(*labels.at(i), (address) constant_addr);
     cb.consts()->relocate((address) constant_addr, relocInfo::internal_word_type);
   }
@@ -3752,11 +3797,21 @@
       }
     }
     for (int i = 0; i < _print_inlining_list->length(); i++) {
-      tty->print(_print_inlining_list->adr_at(i)->ss()->as_string());
+      tty->print("%s", _print_inlining_list->adr_at(i)->ss()->as_string());
     }
   }
 }
 
+// Dump inlining replay data to the stream.
+// Don't change thread state and acquire any locks.
+void Compile::dump_inline_data(outputStream* out) {
+  InlineTree* inl_tree = ilt();
+  if (inl_tree != NULL) {
+    out->print(" inline %d", inl_tree->count());
+    inl_tree->dump_replay_data(out);
+  }
+}
+
 int Compile::cmp_expensive_nodes(Node* n1, Node* n2) {
   if (n1->Opcode() < n2->Opcode())      return -1;
   else if (n1->Opcode() > n2->Opcode()) return 1;
@@ -3893,16 +3948,18 @@
     // which may optimize it out.
     for (uint next = 0; next < worklist.size(); ++next) {
       Node *n  = worklist.at(next);
-      if (n->is_Type() && n->as_Type()->type()->isa_oopptr() != NULL &&
-          n->as_Type()->type()->is_oopptr()->speculative() != NULL) {
+      if (n->is_Type()) {
         TypeNode* tn = n->as_Type();
-        const TypeOopPtr* t = tn->type()->is_oopptr();
-        bool in_hash = igvn.hash_delete(n);
-        assert(in_hash, "node should be in igvn hash table");
-        tn->set_type(t->remove_speculative());
-        igvn.hash_insert(n);
-        igvn._worklist.push(n); // give it a chance to go away
-        modified++;
+        const Type* t = tn->type();
+        const Type* t_no_spec = t->remove_speculative();
+        if (t_no_spec != t) {
+          bool in_hash = igvn.hash_delete(n);
+          assert(in_hash, "node should be in igvn hash table");
+          tn->set_type(t_no_spec);
+          igvn.hash_insert(n);
+          igvn._worklist.push(n); // give it a chance to go away
+          modified++;
+        }
       }
       uint max = n->len();
       for( uint i = 0; i < max; ++i ) {
@@ -3916,6 +3973,27 @@
     if (modified > 0) {
       igvn.optimize();
     }
+#ifdef ASSERT
+    // Verify that after the IGVN is over no speculative type has resurfaced
+    worklist.clear();
+    worklist.push(root());
+    for (uint next = 0; next < worklist.size(); ++next) {
+      Node *n  = worklist.at(next);
+      const Type* t = igvn.type_or_null(n);
+      assert((t == NULL) || (t == t->remove_speculative()), "no more speculative types");
+      if (n->is_Type()) {
+        t = n->as_Type()->type();
+        assert(t == t->remove_speculative(), "no more speculative types");
+      }
+      uint max = n->len();
+      for( uint i = 0; i < max; ++i ) {
+        Node *m = n->in(i);
+        if (not_a_node(m))  continue;
+        worklist.push(m);
+      }
+    }
+    igvn.check_no_speculative_types();
+#endif
   }
 }
author	Gilles Duboscq <duboscq@ssw.jku.at>
date	Wed, 15 Oct 2014 16:02:50 +0200
parents	4ca6dc0799b6 0bf37f737702
children	8b7a143aea6b