Mercurial > hg > truffle

--- a/agent/src/share/classes/sun/jvm/hotspot/tools/HeapSummary.java	Thu Nov 17 10:45:53 2011 -0800
+++ b/agent/src/share/classes/sun/jvm/hotspot/tools/HeapSummary.java	Fri Nov 18 15:15:13 2011 -0800
@@ -30,6 +30,7 @@
 import sun.jvm.hotspot.gc_implementation.parallelScavenge.*;
 import sun.jvm.hotspot.gc_implementation.shared.*;
 import sun.jvm.hotspot.memory.*;
+import sun.jvm.hotspot.oops.*;
 import sun.jvm.hotspot.runtime.*;

 public class HeapSummary extends Tool {
@@ -134,6 +135,9 @@
       } else {
          throw new RuntimeException("unknown CollectedHeap type : " + heap.getClass());
       }
+
+      System.out.println();
+      printInternStringStatistics();
    }

    // Helper methods
@@ -248,4 +252,41 @@
          return -1;
       }
    }
+
+   private void printInternStringStatistics() {
+      class StringStat implements StringTable.StringVisitor {
+         private int count;
+         private long size;
+         private OopField stringValueField;
+
+         StringStat() {
+            VM vm = VM.getVM();
+            SystemDictionary sysDict = vm.getSystemDictionary();
+            InstanceKlass strKlass = sysDict.getStringKlass();
+            // String has a field named 'value' of type 'char[]'.
+            stringValueField = (OopField) strKlass.findField("value", "[C");
+         }
+
+         private long stringSize(Instance instance) {
+            // We include String content in size calculation.
+            return instance.getObjectSize() +
+                   stringValueField.getValue(instance).getObjectSize();
+         }
+
+         public void visit(Instance str) {
+            count++;
+            size += stringSize(str);
+         }
+
+         public void print() {
+            System.out.println(count +
+                  " interned Strings occupying " + size + " bytes.");
+         }
+      }
+
+      StringStat stat = new StringStat();
+      StringTable strTable = VM.getVM().getStringTable();
+      strTable.stringsDo(stat);
+      stat.print();
+   }
 }
--- a/agent/src/share/classes/sun/jvm/hotspot/tools/PermStat.java	Thu Nov 17 10:45:53 2011 -0800
+++ b/agent/src/share/classes/sun/jvm/hotspot/tools/PermStat.java	Fri Nov 18 15:15:13 2011 -0800
@@ -63,47 +63,9 @@
    }

    public void run() {
-      printInternStringStatistics();
       printClassLoaderStatistics();
    }

-   private void printInternStringStatistics() {
-      class StringStat implements StringTable.StringVisitor {
-         private int count;
-         private long size;
-         private OopField stringValueField;
-
-         StringStat() {
-            VM vm = VM.getVM();
-            SystemDictionary sysDict = vm.getSystemDictionary();
-            InstanceKlass strKlass = sysDict.getStringKlass();
-            // String has a field named 'value' of type 'char[]'.
-            stringValueField = (OopField) strKlass.findField("value", "[C");
-         }
-
-         private long stringSize(Instance instance) {
-            // We include String content in size calculation.
-            return instance.getObjectSize() +
-                   stringValueField.getValue(instance).getObjectSize();
-         }
-
-         public void visit(Instance str) {
-            count++;
-            size += stringSize(str);
-         }
-
-         public void print() {
-            System.out.println(count +
-                  " intern Strings occupying " + size + " bytes.");
-         }
-      }
-
-      StringStat stat = new StringStat();
-      StringTable strTable = VM.getVM().getStringTable();
-      strTable.stringsDo(stat);
-      stat.print();
-   }
-
    private void printClassLoaderStatistics() {
       final PrintStream out = System.out;
       final PrintStream err = System.err;
--- a/make/hotspot_version	Thu Nov 17 10:45:53 2011 -0800
+++ b/make/hotspot_version	Fri Nov 18 15:15:13 2011 -0800
@@ -35,7 +35,7 @@

 HS_MAJOR_VER=23
 HS_MINOR_VER=0
-HS_BUILD_NUMBER=05
+HS_BUILD_NUMBER=06

 JDK_MAJOR_VER=1
 JDK_MINOR_VER=8
--- a/make/jprt.properties	Thu Nov 17 10:45:53 2011 -0800
+++ b/make/jprt.properties	Fri Nov 18 15:15:13 2011 -0800
@@ -541,9 +541,20 @@
   ${jprt.my.windows.i586}-*-c2-servertest, \
   ${jprt.my.windows.x64}-*-c2-servertest

+jprt.make.rule.test.targets.standard.internalvmtests = \
+  ${jprt.my.solaris.sparc}-fastdebug-c2-internalvmtests, \
+  ${jprt.my.solaris.sparcv9}-fastdebug-c2-internalvmtests, \
+  ${jprt.my.solaris.i586}-fastdebug-c2-internalvmtests, \
+  ${jprt.my.solaris.x64}-fastdebug-c2-internalvmtests, \
+  ${jprt.my.linux.i586}-fastdebug-c2-internalvmtests, \
+  ${jprt.my.linux.x64}-fastdebug-c2-internalvmtests, \
+  ${jprt.my.windows.i586}-fastdebug-c2-internalvmtests, \
+  ${jprt.my.windows.x64}-fastdebug-c2-internalvmtests
+
 jprt.make.rule.test.targets.standard = \
   ${jprt.make.rule.test.targets.standard.client}, \
-  ${jprt.make.rule.test.targets.standard.server}
+  ${jprt.make.rule.test.targets.standard.server}, \
+  ${jprt.make.rule.test.targets.standard.internalvmtests}

 jprt.make.rule.test.targets.embedded = \
   ${jprt.make.rule.test.targets.standard.client}
--- a/src/cpu/sparc/vm/assembler_sparc.inline.hpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/cpu/sparc/vm/assembler_sparc.inline.hpp	Fri Nov 18 15:15:13 2011 -0800
@@ -597,6 +597,10 @@
 inline void MacroAssembler::jmp( Register s1, int simm13a, RelocationHolder const& rspec ) { jmpl( s1, simm13a, G0, rspec); }

 inline bool MacroAssembler::is_far_target(address d) {
+  if (ForceUnreachable) {
+    // References outside the code cache should be treated as far
+    return d < CodeCache::low_bound() || d > CodeCache::high_bound();
+  }
   return !is_in_wdisp30_range(d, CodeCache::low_bound()) || !is_in_wdisp30_range(d, CodeCache::high_bound());
 }

@@ -679,28 +683,44 @@

 inline void MacroAssembler::load_contents(const AddressLiteral& addrlit, Register d, int offset) {
   assert_not_delayed();
-  sethi(addrlit, d);
+  if (ForceUnreachable) {
+    patchable_sethi(addrlit, d);
+  } else {
+    sethi(addrlit, d);
+  }
   ld(d, addrlit.low10() + offset, d);
 }


 inline void MacroAssembler::load_ptr_contents(const AddressLiteral& addrlit, Register d, int offset) {
   assert_not_delayed();
-  sethi(addrlit, d);
+  if (ForceUnreachable) {
+    patchable_sethi(addrlit, d);
+  } else {
+    sethi(addrlit, d);
+  }
   ld_ptr(d, addrlit.low10() + offset, d);
 }


 inline void MacroAssembler::store_contents(Register s, const AddressLiteral& addrlit, Register temp, int offset) {
   assert_not_delayed();
-  sethi(addrlit, temp);
+  if (ForceUnreachable) {
+    patchable_sethi(addrlit, temp);
+  } else {
+    sethi(addrlit, temp);
+  }
   st(s, temp, addrlit.low10() + offset);
 }


 inline void MacroAssembler::store_ptr_contents(Register s, const AddressLiteral& addrlit, Register temp, int offset) {
   assert_not_delayed();
-  sethi(addrlit, temp);
+  if (ForceUnreachable) {
+    patchable_sethi(addrlit, temp);
+  } else {
+    sethi(addrlit, temp);
+  }
   st_ptr(s, temp, addrlit.low10() + offset);
 }
--- a/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -367,10 +367,10 @@

 void DeoptimizeStub::emit_code(LIR_Assembler* ce) {
   __ bind(_entry);
-  __ call(SharedRuntime::deopt_blob()->unpack_with_reexecution());
+  __ call(Runtime1::entry_for(Runtime1::deoptimize_id), relocInfo::runtime_call_type);
   __ delayed()->nop();
   ce->add_call_info_here(_info);
-  debug_only(__ should_not_reach_here());
+  DEBUG_ONLY(__ should_not_reach_here());
 }
--- a/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -1116,7 +1116,7 @@
       } else {
         __ set(value_hi, O7);
       }
-      offset = store(tmp, base, addr->disp() + hi_word_offset_in_bytes, T_INT, wide, false);
+      store(tmp, base, addr->disp() + hi_word_offset_in_bytes, T_INT, wide, false);
       break;
     }
     case T_OBJECT: {
--- a/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -766,7 +766,22 @@

         __ ret();
         __ delayed()->restore();
+      }
+      break;

+    case deoptimize_id:
+      {
+        __ set_info("deoptimize", dont_gc_arguments);
+        OopMap* oop_map = save_live_registers(sasm);
+        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, deoptimize));
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, oop_map);
+        restore_live_registers(sasm);
+        DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
+        assert(deopt_blob != NULL, "deoptimization blob must have been created");
+        AddressLiteral dest(deopt_blob->unpack_with_reexecution());
+        __ jump_to(dest, O0);
+        __ delayed()->restore();
       }
       break;
--- a/src/cpu/sparc/vm/methodHandles_sparc.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/cpu/sparc/vm/methodHandles_sparc.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -352,6 +352,7 @@
   BLOCK_COMMENT("load_stack_move {");
   __ ldsw(G3_amh_conversion, stack_move_reg);
   __ sra(stack_move_reg, CONV_STACK_MOVE_SHIFT, stack_move_reg);
+#ifdef ASSERT
   if (VerifyMethodHandles) {
     Label L_ok, L_bad;
     int32_t stack_move_limit = 0x0800;  // extra-large
@@ -363,6 +364,7 @@
     __ stop("load_stack_move of garbage value");
     __ BIND(L_ok);
   }
+#endif
   BLOCK_COMMENT("} load_stack_move");
 }
--- a/src/cpu/sparc/vm/methodHandles_sparc.hpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/cpu/sparc/vm/methodHandles_sparc.hpp	Fri Nov 18 15:15:13 2011 -0800
@@ -27,7 +27,7 @@

 // Adapters
 enum /* platform_dependent_constants */ {
-  adapter_code_size = NOT_LP64(22000 DEBUG_ONLY(+ 40000)) LP64_ONLY(32000 DEBUG_ONLY(+ 80000))
+  adapter_code_size = NOT_LP64(23000 DEBUG_ONLY(+ 40000)) LP64_ONLY(35000 DEBUG_ONLY(+ 50000))
 };

 public:
--- a/src/cpu/sparc/vm/sparc.ad	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/cpu/sparc/vm/sparc.ad	Fri Nov 18 15:15:13 2011 -0800
@@ -1860,6 +1860,14 @@
 // Threshold size for cleararray.
 const int Matcher::init_array_short_size = 8 * BytesPerLong;

+// No additional cost for CMOVL.
+const int Matcher::long_cmove_cost() { return 0; }
+
+// CMOVF/CMOVD are expensive on T4 and on SPARC64.
+const int Matcher::float_cmove_cost() {
+  return (VM_Version::is_T4() || VM_Version::is_sparc64()) ? ConditionalMoveLimit : 0;
+}
+
 // Should the Matcher clone shifts on addressing modes, expecting them to
 // be subsumed into complex addressing expressions or compute them into
 // registers?  True for Intel but false for most RISCs
--- a/src/cpu/sparc/vm/vm_version_sparc.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/cpu/sparc/vm/vm_version_sparc.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -211,7 +211,7 @@
 #ifdef COMPILER2
   // T4 and newer Sparc cpus have fast RDPC.
   if (has_fast_rdpc() && FLAG_IS_DEFAULT(UseRDPCForConstantTableBase)) {
-//    FLAG_SET_DEFAULT(UseRDPCForConstantTableBase, true);
+    FLAG_SET_DEFAULT(UseRDPCForConstantTableBase, true);
   }

   // Currently not supported anywhere.
--- a/src/cpu/x86/vm/c1_CodeStubs_x86.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/cpu/x86/vm/c1_CodeStubs_x86.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -387,9 +387,9 @@

 void DeoptimizeStub::emit_code(LIR_Assembler* ce) {
   __ bind(_entry);
-  __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack_with_reexecution()));
+  __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::deoptimize_id)));
   ce->add_call_info_here(_info);
-  debug_only(__ should_not_reach_here());
+  DEBUG_ONLY(__ should_not_reach_here());
 }
--- a/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -1447,7 +1447,22 @@
         oop_maps = new OopMapSet();
         oop_maps->add_gc_map(call_offset, map);
         restore_live_registers(sasm, save_fpu_registers);
+      }
+      break;

+    case deoptimize_id:
+      {
+        StubFrame f(sasm, "deoptimize", dont_gc_arguments);
+        const int num_rt_args = 1;  // thread
+        OopMap* oop_map = save_live_registers(sasm, num_rt_args);
+        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, deoptimize));
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, oop_map);
+        restore_live_registers(sasm);
+        DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
+        assert(deopt_blob != NULL, "deoptimization blob must have been created");
+        __ leave();
+        __ jump(RuntimeAddress(deopt_blob->unpack_with_reexecution()));
       }
       break;
--- a/src/cpu/x86/vm/frame_x86.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/cpu/x86/vm/frame_x86.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -234,10 +234,12 @@
 void frame::patch_pc(Thread* thread, address pc) {
   address* pc_addr = &(((address*) sp())[-1]);
   if (TracePcPatching) {
-    tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "] ",
+    tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]",
                   pc_addr, *pc_addr, pc);
   }
-  assert(_pc == *pc_addr, err_msg("must be: " INTPTR_FORMAT " == " INTPTR_FORMAT, _pc, *pc_addr));
+  // Either the return address is the original one or we are going to
+  // patch in the same address that's already there.
+  assert(_pc == *pc_addr || pc == *pc_addr, "must be");
   *pc_addr = pc;
   _cb = CodeCache::find_blob(pc);
   address original_pc = nmethod::get_deopt_original_pc(this);
--- a/src/cpu/x86/vm/methodHandles_x86.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/cpu/x86/vm/methodHandles_x86.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -382,6 +382,7 @@
     __ movslq(rdi_stack_move, rdi_stack_move);
   }
 #endif //_LP64
+#ifdef ASSERT
   if (VerifyMethodHandles) {
     Label L_ok, L_bad;
     int32_t stack_move_limit = 0x4000;  // extra-large
@@ -393,6 +394,7 @@
     __ stop("load_stack_move of garbage value");
     __ BIND(L_ok);
   }
+#endif
   BLOCK_COMMENT("} load_stack_move");
 }
--- a/src/cpu/x86/vm/methodHandles_x86.hpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/cpu/x86/vm/methodHandles_x86.hpp	Fri Nov 18 15:15:13 2011 -0800
@@ -27,7 +27,7 @@

 // Adapters
 enum /* platform_dependent_constants */ {
-  adapter_code_size = NOT_LP64(30000 DEBUG_ONLY(+ 10000)) LP64_ONLY(80000 DEBUG_ONLY(+ 120000))
+  adapter_code_size = NOT_LP64(16000 DEBUG_ONLY(+ 15000)) LP64_ONLY(32000 DEBUG_ONLY(+ 80000))
 };

 public:
--- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -2797,17 +2797,25 @@
   // void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode)

   // Use rbp because the frames look interpreted now
-  __ set_last_Java_frame(noreg, rbp, NULL);
-
+  // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP.
+  // Don't need the precise return PC here, just precise enough to point into this code blob.
+  address the_pc = __ pc();
+  __ set_last_Java_frame(noreg, rbp, the_pc);
+
+  __ andptr(rsp, -(StackAlignmentInBytes));  // Fix stack alignment as required by ABI
   __ mov(c_rarg0, r15_thread);
   __ movl(c_rarg1, r14); // second arg: exec_mode
   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)));
+  // Revert SP alignment after call since we're going to do some SP relative addressing below
+  __ movptr(rsp, Address(r15_thread, JavaThread::last_Java_sp_offset()));

   // Set an oopmap for the call site
-  oop_maps->add_gc_map(__ pc() - start,
+  // Use the same PC we used for the last java frame
+  oop_maps->add_gc_map(the_pc - start,
                        new OopMap( frame_size_in_words, 0 ));

-  __ reset_last_Java_frame(true, false);
+  // Clear fp AND pc
+  __ reset_last_Java_frame(true, true);

   // Collect return values
   __ movdbl(xmm0, Address(rsp, RegisterSaver::xmm0_offset_in_bytes()));
@@ -2968,7 +2976,10 @@
                               // Prolog

   // Use rbp because the frames look interpreted now
-  __ set_last_Java_frame(noreg, rbp, NULL);
+  // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP.
+  // Don't need the precise return PC here, just precise enough to point into this code blob.
+  address the_pc = __ pc();
+  __ set_last_Java_frame(noreg, rbp, the_pc);

   // Call C code.  Need thread but NOT official VM entry
   // crud.  We cannot block on this call, no GC can happen.  Call should
@@ -2977,14 +2988,17 @@
   //
   // BasicType unpack_frames(JavaThread* thread, int exec_mode);

+  __ andptr(rsp, -(StackAlignmentInBytes)); // Align SP as required by ABI
   __ mov(c_rarg0, r15_thread);
   __ movl(c_rarg1, Deoptimization::Unpack_uncommon_trap);
   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)));

   // Set an oopmap for the call site
-  oop_maps->add_gc_map(__ pc() - start, new OopMap(SimpleRuntimeFrame::framesize, 0));
-
-  __ reset_last_Java_frame(true, false);
+  // Use the same PC we used for the last java frame
+  oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0));
+
+  // Clear fp AND pc
+  __ reset_last_Java_frame(true, true);

   // Pop self-frame.
   __ leave();                 // Epilog
--- a/src/cpu/x86/vm/templateInterpreter_x86_32.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/cpu/x86/vm/templateInterpreter_x86_32.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -1609,6 +1609,12 @@
     // and sender_sp is fp+8
     intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1;

+#ifdef ASSERT
+    if (caller->is_interpreted_frame()) {
+      assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement");
+    }
+#endif
+
     interpreter_frame->interpreter_frame_set_locals(locals);
     BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin();
     BasicObjectLock* monbot = montop - moncount;
--- a/src/cpu/x86/vm/templateInterpreter_x86_64.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/cpu/x86/vm/templateInterpreter_x86_64.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -1622,6 +1622,12 @@
     // sender_sp is fp+16 XXX
     intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1;

+#ifdef ASSERT
+    if (caller->is_interpreted_frame()) {
+      assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement");
+    }
+#endif
+
     interpreter_frame->interpreter_frame_set_locals(locals);
     BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin();
     BasicObjectLock* monbot = montop - moncount;
--- a/src/cpu/x86/vm/x86_32.ad	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/cpu/x86/vm/x86_32.ad	Fri Nov 18 15:15:13 2011 -0800
@@ -1393,6 +1393,12 @@
 // Threshold size for cleararray.
 const int Matcher::init_array_short_size = 8 * BytesPerLong;

+// Needs 2 CMOV's for longs.
+const int Matcher::long_cmove_cost() { return 1; }
+
+// No CMOVF/CMOVD with SSE/SSE2
+const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
+
 // Should the Matcher clone shifts on addressing modes, expecting them to
 // be subsumed into complex addressing expressions or compute them into
 // registers?  True for Intel but false for most RISCs
@@ -7905,6 +7911,40 @@

 //----------Conditional Move---------------------------------------------------
 // Conditional move
+instruct jmovI_reg(cmpOp cop, eFlagsReg cr, eRegI dst, eRegI src) %{
+  predicate(!VM_Version::supports_cmov() );
+  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
+  ins_cost(200);
+  format %{ "J$cop,us skip\t# signed cmove\n\t"
+            "MOV    $dst,$src\n"
+      "skip:" %}
+  ins_encode %{
+    Label Lskip;
+    // Invert sense of branch from sense of CMOV
+    __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
+    __ movl($dst$$Register, $src$$Register);
+    __ bind(Lskip);
+  %}
+  ins_pipe( pipe_cmov_reg );
+%}
+
+instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, eRegI dst, eRegI src) %{
+  predicate(!VM_Version::supports_cmov() );
+  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
+  ins_cost(200);
+  format %{ "J$cop,us skip\t# unsigned cmove\n\t"
+            "MOV    $dst,$src\n"
+      "skip:" %}
+  ins_encode %{
+    Label Lskip;
+    // Invert sense of branch from sense of CMOV
+    __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
+    __ movl($dst$$Register, $src$$Register);
+    __ bind(Lskip);
+  %}
+  ins_pipe( pipe_cmov_reg );
+%}
+
 instruct cmovI_reg(eRegI dst, eRegI src, eFlagsReg cr, cmpOp cop ) %{
   predicate(VM_Version::supports_cmov() );
   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
--- a/src/cpu/x86/vm/x86_64.ad	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/cpu/x86/vm/x86_64.ad	Fri Nov 18 15:15:13 2011 -0800
@@ -1993,6 +1993,12 @@
 // Threshold size for cleararray.
 const int Matcher::init_array_short_size = 8 * BytesPerLong;

+// No additional cost for CMOVL.
+const int Matcher::long_cmove_cost() { return 0; }
+
+// No CMOVF/CMOVD with SSE2
+const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; }
+
 // Should the Matcher clone shifts on addressing modes, expecting them
 // to be subsumed into complex addressing expressions or compute them
 // into registers?  True for Intel but false for most RISCs
--- a/src/share/vm/asm/codeBuffer.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/asm/codeBuffer.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -26,6 +26,7 @@
 #include "asm/codeBuffer.hpp"
 #include "compiler/disassembler.hpp"
 #include "utilities/copy.hpp"
+#include "utilities/xmlstream.hpp"

 // The structure of a CodeSection:
 //
@@ -81,7 +82,7 @@
 CodeBuffer::CodeBuffer(CodeBlob* blob) {
   initialize_misc("static buffer");
   initialize(blob->content_begin(), blob->content_size());
-  assert(verify_section_allocation(), "initial use of buffer OK");
+  verify_section_allocation();
 }

 void CodeBuffer::initialize(csize_t code_size, csize_t locs_size) {
@@ -108,17 +109,18 @@
     _insts.initialize_locs(locs_size / sizeof(relocInfo));
   }

-  assert(verify_section_allocation(), "initial use of blob is OK");
+  verify_section_allocation();
 }


 CodeBuffer::~CodeBuffer() {
+  verify_section_allocation();
+
   // If we allocate our code buffer from the CodeCache
   // via a BufferBlob, and it's not permanent, then
   // free the BufferBlob.
   // The rest of the memory will be freed when the ResourceObj
   // is released.
-  assert(verify_section_allocation(), "final storage configuration still OK");
   for (CodeBuffer* cb = this; cb != NULL; cb = cb->before_expand()) {
     // Previous incarnations of this buffer are held live, so that internal
     // addresses constructed before expansions will not be confused.
@@ -484,7 +486,7 @@

   // Done calculating sections; did it come out to the right end?
   assert(buf_offset == total_content_size(), "sanity");
-  assert(dest->verify_section_allocation(), "final configuration works");
+  dest->verify_section_allocation();
 }

 csize_t CodeBuffer::total_offset_of(CodeSection* cs) const {
@@ -632,7 +634,8 @@
 // CodeBuffer gets the final layout (consts, insts, stubs in order of
 // ascending address).
 void CodeBuffer::relocate_code_to(CodeBuffer* dest) const {
-  DEBUG_ONLY(address dest_end = dest->_total_start + dest->_total_size);
+  address dest_end = dest->_total_start + dest->_total_size;
+  address dest_filled = NULL;
   for (int n = (int) SECT_FIRST; n < (int) SECT_LIMIT; n++) {
     // pull code out of each section
     const CodeSection* cs = code_section(n);
@@ -654,6 +657,8 @@
       Copy::fill_to_bytes(dest_cs->end(), dest_cs->remaining(),
                           Assembler::code_fill_byte());
     }
+    // Keep track of the highest filled address
+    dest_filled = MAX2(dest_filled, dest_cs->end() + dest_cs->remaining());

     assert(cs->locs_start() != (relocInfo*)badAddress,
            "this section carries no reloc storage, but reloc was attempted");
@@ -668,6 +673,14 @@
       }
     }
   }
+
+  if (dest->blob() == NULL) {
+    // Destination is a final resting place, not just another buffer.
+    // Normalize uninitialized bytes in the final padding.
+    Copy::fill_to_bytes(dest_filled, dest_end - dest_filled,
+                        Assembler::code_fill_byte());
+
+  }
 }

 csize_t CodeBuffer::figure_expanded_capacities(CodeSection* which_cs,
@@ -799,7 +812,7 @@
   _decode_begin = NULL;  // sanity

   // Make certain that the new sections are all snugly inside the new blob.
-  assert(verify_section_allocation(), "expanded allocation is ship-shape");
+  verify_section_allocation();

 #ifndef PRODUCT
   if (PrintNMethods && (WizardMode || Verbose)) {
@@ -828,35 +841,48 @@
   DEBUG_ONLY(cb->_blob = (BufferBlob*)badAddress);
 }

-#ifdef ASSERT
-bool CodeBuffer::verify_section_allocation() {
+void CodeBuffer::verify_section_allocation() {
   address tstart = _total_start;
-  if (tstart == badAddress)  return true;  // smashed by set_blob(NULL)
+  if (tstart == badAddress)  return;  // smashed by set_blob(NULL)
   address tend   = tstart + _total_size;
   if (_blob != NULL) {
-    assert(tstart >= _blob->content_begin(), "sanity");
-    assert(tend   <= _blob->content_end(),   "sanity");
+
+    guarantee(tstart >= _blob->content_begin(), "sanity");
+    guarantee(tend   <= _blob->content_end(),   "sanity");
   }
   // Verify disjointness.
   for (int n = (int) SECT_FIRST; n < (int) SECT_LIMIT; n++) {
     CodeSection* sect = code_section(n);
     if (!sect->is_allocated() || sect->is_empty())  continue;
-    assert((intptr_t)sect->start() % sect->alignment() == 0
+    guarantee((intptr_t)sect->start() % sect->alignment() == 0
            || sect->is_empty() || _blob == NULL,
            "start is aligned");
     for (int m = (int) SECT_FIRST; m < (int) SECT_LIMIT; m++) {
       CodeSection* other = code_section(m);
       if (!other->is_allocated() || other == sect)  continue;
-      assert(!other->contains(sect->start()    ), "sanity");
+      guarantee(!other->contains(sect->start()    ), "sanity");
       // limit is an exclusive address and can be the start of another
       // section.
-      assert(!other->contains(sect->limit() - 1), "sanity");
+      guarantee(!other->contains(sect->limit() - 1), "sanity");
     }
-    assert(sect->end() <= tend, "sanity");
+    guarantee(sect->end() <= tend, "sanity");
+    guarantee(sect->end() <= sect->limit(), "sanity");
   }
-  return true;
 }
-#endif //ASSERT
+
+void CodeBuffer::log_section_sizes(const char* name) {
+  if (xtty != NULL) {
+    // log info about buffer usage
+    xtty->print_cr("<blob name='%s' size='%d'>", name, _total_size);
+    for (int n = (int) CodeBuffer::SECT_FIRST; n < (int) CodeBuffer::SECT_LIMIT; n++) {
+      CodeSection* sect = code_section(n);
+      if (!sect->is_allocated() || sect->is_empty())  continue;
+      xtty->print_cr("<sect index='%d' size='" SIZE_FORMAT "' free='" SIZE_FORMAT "'/>",
+                     n, sect->limit() - sect->start(), sect->limit() - sect->end());
+    }
+    xtty->print_cr("</blob>");
+  }
+}

 #ifndef PRODUCT

@@ -884,7 +910,6 @@
   _comments.add_comment(offset, comment);
 }

-
 class CodeComment: public CHeapObj {
  private:
   friend class CodeComments;
--- a/src/share/vm/asm/codeBuffer.hpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/asm/codeBuffer.hpp	Fri Nov 18 15:15:13 2011 -0800
@@ -362,10 +362,8 @@
   // helper for CodeBuffer::expand()
   void take_over_code_from(CodeBuffer* cs);

-#ifdef ASSERT
   // ensure sections are disjoint, ordered, and contained in the blob
-  bool verify_section_allocation();
-#endif
+  void verify_section_allocation();

   // copies combined relocations to the blob, returns bytes copied
   // (if target is null, it is a dry run only, just for sizing)
@@ -393,7 +391,7 @@
     assert(code_start != NULL, "sanity");
     initialize_misc("static buffer");
     initialize(code_start, code_size);
-    assert(verify_section_allocation(), "initial use of buffer OK");
+    verify_section_allocation();
   }

   // (2) CodeBuffer referring to pre-allocated CodeBlob.
@@ -545,6 +543,9 @@

   void block_comment(intptr_t offset, const char * comment) PRODUCT_RETURN;

+  // Log a little info about section usage in the CodeBuffer
+  void log_section_sizes(const char* name);
+
 #ifndef PRODUCT
  public:
   // Printing / Decoding
--- a/src/share/vm/c1/c1_Canonicalizer.hpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/c1/c1_Canonicalizer.hpp	Fri Nov 18 15:15:13 2011 -0800
@@ -51,6 +51,7 @@

  public:
   Canonicalizer(Compilation* c, Value x, int bci) : _compilation(c), _canonical(x), _bci(bci) {
+    NOT_PRODUCT(x->set_printable_bci(bci));
     if (CanonicalizeNodes) x->visit(this);
   }
   Value canonical() const                        { return _canonical; }
--- a/src/share/vm/c1/c1_Runtime1.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/c1/c1_Runtime1.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -681,6 +681,23 @@
   }
 JRT_END

+// Cf. OptoRuntime::deoptimize_caller_frame
+JRT_ENTRY(void, Runtime1::deoptimize(JavaThread* thread))
+  // Called from within the owner thread, so no need for safepoint
+  RegisterMap reg_map(thread, false);
+  frame stub_frame = thread->last_frame();
+  assert(stub_frame.is_runtime_frame(), "sanity check");
+  frame caller_frame = stub_frame.sender(&reg_map);
+
+  // We are coming from a compiled method; check this is true.
+  assert(CodeCache::find_nmethod(caller_frame.pc()) != NULL, "sanity");
+
+  // Deoptimize the caller frame.
+  Deoptimization::deoptimize_frame(thread, caller_frame.id());
+
+  // Return to the now deoptimized frame.
+JRT_END
+

 static klassOop resolve_field_return_klass(methodHandle caller, int bci, TRAPS) {
   Bytecode_field field_access(caller, bci);
--- a/src/share/vm/c1/c1_Runtime1.hpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/c1/c1_Runtime1.hpp	Fri Nov 18 15:15:13 2011 -0800
@@ -63,6 +63,7 @@
   stub(monitorenter_nofpu)             /* optimized version that does not preserve fpu registers */ \
   stub(monitorexit)                  \
   stub(monitorexit_nofpu)              /* optimized version that does not preserve fpu registers */ \
+  stub(deoptimize)                   \
   stub(access_field_patching)        \
   stub(load_klass_patching)          \
   stub(g1_pre_barrier_slow)          \
@@ -152,6 +153,8 @@
   static void monitorenter(JavaThread* thread, oopDesc* obj, BasicObjectLock* lock);
   static void monitorexit (JavaThread* thread, BasicObjectLock* lock);

+  static void deoptimize(JavaThread* thread);
+
   static int access_field_patching(JavaThread* thread);
   static int move_klass_patching(JavaThread* thread);
--- a/src/share/vm/ci/ciMethodHandle.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/ci/ciMethodHandle.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -86,12 +86,12 @@
 }


-#ifndef PRODUCT
+#ifdef ASSERT
 // ------------------------------------------------------------------
 // ciMethodHandle::print_chain_impl
 //
 // Implementation of the print method.
-void ciMethodHandle::print_chain_impl(outputStream* st) {
+void ciMethodHandle::print_chain_impl() {
   ASSERT_IN_VM;
   MethodHandleChain::print(get_oop());
 }
@@ -101,7 +101,7 @@
 // ciMethodHandle::print_chain
 //
 // Implementation of the print_chain method.
-void ciMethodHandle::print_chain(outputStream* st) {
-  GUARDED_VM_ENTRY(print_chain_impl(st););
+void ciMethodHandle::print_chain() {
+  GUARDED_VM_ENTRY(print_chain_impl(););
 }
 #endif
--- a/src/share/vm/ci/ciMethodHandle.hpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/ci/ciMethodHandle.hpp	Fri Nov 18 15:15:13 2011 -0800
@@ -45,7 +45,7 @@
   ciMethod* get_adapter(     bool is_invokedynamic);

 protected:
-  void print_chain_impl(outputStream* st) PRODUCT_RETURN;
+  void print_chain_impl() NOT_DEBUG_RETURN;

 public:
   ciMethodHandle(instanceHandle h_i) :
@@ -79,7 +79,7 @@
     return _invokedynamic_adapter;
   }

-  void print_chain(outputStream* st = tty) PRODUCT_RETURN;
+  void print_chain() NOT_DEBUG_RETURN;
 };

 #endif // SHARE_VM_CI_CIMETHODHANDLE_HPP
--- a/src/share/vm/code/dependencies.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/code/dependencies.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -763,9 +763,14 @@
       // Method m is inherited into ctxk.
       return true;
     if (lm != NULL) {
-      if (!(lm->is_public() || lm->is_protected()))
+      if (!(lm->is_public() || lm->is_protected())) {
         // Method is [package-]private, so the override story is complex.
         return true;  // Must punt the assertion to true.
+      }
+      if (lm->is_static()) {
+        // Static methods don't override non-static so punt
+        return true;
+      }
       if (   !Dependencies::is_concrete_method(lm)
           && !Dependencies::is_concrete_method(m)
           && Klass::cast(lm->method_holder())->is_subtype_of(m->method_holder()))
@@ -1091,9 +1096,11 @@
 }

 bool Dependencies::is_concrete_method(methodOop m) {
-  if (m->is_abstract())  return false;
-  // %%% We could treat unexecuted methods as virtually abstract also.
-  // This would require a deoptimization barrier on first execution.
+  // Statics are irrelevant to virtual call sites.
+  if (m->is_static())  return false;
+
+  // We could also return false if m does not yet appear to be
+  // executed, if the VM version supports this distinction also.
   return !m->is_abstract();
 }

@@ -1113,7 +1120,7 @@

 bool Dependencies::is_concrete_klass(ciInstanceKlass* k) {
   if (k->is_abstract())  return false;
-  // We could return also false if k does not yet appear to be
+  // We could also return false if k does not yet appear to be
   // instantiated, if the VM version supports this distinction also.
   //if (k->is_not_instantiated())  return false;
   return true;
@@ -1123,7 +1130,7 @@
   // Statics are irrelevant to virtual call sites.
   if (m->is_static())  return false;

-  // We could return also false if m does not yet appear to be
+  // We could also return false if m does not yet appear to be
   // executed, if the VM version supports this distinction also.
   return !m->is_abstract();
 }
--- a/src/share/vm/compiler/compileBroker.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/compiler/compileBroker.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -1722,11 +1722,11 @@
       if (PrintCompilation) {
         const char* reason = ci_env.failure_reason();
         if (compilable == ciEnv::MethodCompilable_not_at_tier) {
-            tty->print_cr("%3d   COMPILE SKIPPED: %s (retry at different tier)", compile_id, reason);
+          tty->print_cr("%4d   COMPILE SKIPPED: %s (retry at different tier)", compile_id, reason);
         } else if (compilable == ciEnv::MethodCompilable_never) {
-          tty->print_cr("%3d   COMPILE SKIPPED: %s (not retryable)", compile_id, reason);
+          tty->print_cr("%4d   COMPILE SKIPPED: %s (not retryable)", compile_id, reason);
         } else if (compilable == ciEnv::MethodCompilable) {
-          tty->print_cr("%3d   COMPILE SKIPPED: %s", compile_id, reason);
+          tty->print_cr("%4d   COMPILE SKIPPED: %s", compile_id, reason);
         }
       }
     } else {
@@ -1743,6 +1743,14 @@

   collect_statistics(thread, time, task);

+  if (PrintCompilation && PrintCompilation2) {
+    tty->print("%7d ", (int) tty->time_stamp().milliseconds());  // print timestamp
+    tty->print("%4d ", compile_id);    // print compilation number
+    tty->print("%s ", (is_osr ? "%" : " "));
+    int code_size = (task->code() == NULL) ? 0 : task->code()->total_size();
+    tty->print_cr("size: %d time: %d inlined: %d bytes", code_size, time.milliseconds(), task->num_inlined_bytecodes());
+  }
+
   if (compilable == ciEnv::MethodCompilable_never) {
     if (is_osr) {
       method->set_not_osr_compilable();
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -50,8 +50,8 @@
 int CompactibleFreeListSpace::_lockRank = Mutex::leaf + 3;

 // Defaults are 0 so things will break badly if incorrectly initialized.
-int CompactibleFreeListSpace::IndexSetStart  = 0;
-int CompactibleFreeListSpace::IndexSetStride = 0;
+size_t CompactibleFreeListSpace::IndexSetStart  = 0;
+size_t CompactibleFreeListSpace::IndexSetStride = 0;

 size_t MinChunkSize = 0;

@@ -62,7 +62,7 @@
   MinChunkSize = numQuanta(sizeof(FreeChunk), MinObjAlignmentInBytes) * MinObjAlignment;

   assert(IndexSetStart == 0 && IndexSetStride == 0, "already set");
-  IndexSetStart  = (int) MinChunkSize;
+  IndexSetStart  = MinChunkSize;
   IndexSetStride = MinObjAlignment;
 }

@@ -250,7 +250,7 @@
 }

 void CompactibleFreeListSpace::resetIndexedFreeListArray() {
-  for (int i = 1; i < IndexSetSize; i++) {
+  for (size_t i = 1; i < IndexSetSize; i++) {
     assert(_indexedFreeList[i].size() == (size_t) i,
       "Indexed free list sizes are incorrect");
     _indexedFreeList[i].reset(IndexSetSize);
@@ -337,7 +337,7 @@

 size_t CompactibleFreeListSpace::totalCountInIndexedFreeLists() const {
   size_t count = 0;
-  for (int i = (int)MinChunkSize; i < IndexSetSize; i++) {
+  for (size_t i = IndexSetStart; i < IndexSetSize; i++) {
     debug_only(
       ssize_t total_list_count = 0;
       for (FreeChunk* fc = _indexedFreeList[i].head(); fc != NULL;
@@ -2200,7 +2200,7 @@

 void CompactibleFreeListSpace::clearFLCensus() {
   assert_locked();
-  int i;
+  size_t i;
   for (i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) {
     FreeList *fl = &_indexedFreeList[i];
     fl->set_prevSweep(fl->count());
@@ -2494,7 +2494,7 @@

 void CompactibleFreeListSpace::verifyIndexedFreeLists() const {
   size_t i = 0;
-  for (; i < MinChunkSize; i++) {
+  for (; i < IndexSetStart; i++) {
     guarantee(_indexedFreeList[i].head() == NULL, "should be NULL");
   }
   for (; i < IndexSetSize; i++) {
@@ -2507,7 +2507,7 @@
   FreeChunk* tail =  _indexedFreeList[size].tail();
   size_t    num = _indexedFreeList[size].count();
   size_t      n = 0;
-  guarantee(((size >= MinChunkSize) && (size % IndexSetStride == 0)) || fc == NULL,
+  guarantee(((size >= IndexSetStart) && (size % IndexSetStride == 0)) || fc == NULL,
             "Slot should have been empty");
   for (; fc != NULL; fc = fc->next(), n++) {
     guarantee(fc->size() == size, "Size inconsistency");
@@ -2527,7 +2527,7 @@
     "else MIN_TREE_CHUNK_SIZE is wrong");
   assert((IndexSetStride == 2 && IndexSetStart == 4) ||                   // 32-bit
          (IndexSetStride == 1 && IndexSetStart == 3), "just checking");   // 64-bit
-  assert((IndexSetStride != 2) || (MinChunkSize % 2 == 0),
+  assert((IndexSetStride != 2) || (IndexSetStart % 2 == 0),
       "Some for-loops may be incorrectly initialized");
   assert((IndexSetStride != 2) || (IndexSetSize % 2 == 1),
       "For-loops that iterate over IndexSet with stride 2 may be wrong");
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp	Fri Nov 18 15:15:13 2011 -0800
@@ -104,8 +104,8 @@
     SmallForDictionary  = 257,       // size < this then use _indexedFreeList
     IndexSetSize        = SmallForDictionary  // keep this odd-sized
   };
-  static int IndexSetStart;
-  static int IndexSetStride;
+  static size_t IndexSetStart;
+  static size_t IndexSetStride;

  private:
   enum FitStrategyOptions {
--- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -1518,6 +1518,7 @@
   size_t _regions_claimed;
   size_t _freed_bytes;
   FreeRegionList* _local_cleanup_list;
+  OldRegionSet* _old_proxy_set;
   HumongousRegionSet* _humongous_proxy_set;
   HRRSCleanupTask* _hrrs_cleanup_task;
   double _claimed_region_time;
@@ -1527,6 +1528,7 @@
   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
                              int worker_num,
                              FreeRegionList* local_cleanup_list,
+                             OldRegionSet* old_proxy_set,
                              HumongousRegionSet* humongous_proxy_set,
                              HRRSCleanupTask* hrrs_cleanup_task);
   size_t freed_bytes() { return _freed_bytes; }
@@ -1557,9 +1559,11 @@
   void work(int i) {
     double start = os::elapsedTime();
     FreeRegionList local_cleanup_list("Local Cleanup List");
+    OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set");
     HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
     HRRSCleanupTask hrrs_cleanup_task;
     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, i, &local_cleanup_list,
+                                           &old_proxy_set,
                                            &humongous_proxy_set,
                                            &hrrs_cleanup_task);
     if (G1CollectedHeap::use_parallel_gc_threads()) {
@@ -1573,6 +1577,7 @@
     // Now update the lists
     _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
                                             NULL /* free_list */,
+                                            &old_proxy_set,
                                             &humongous_proxy_set,
                                             true /* par */);
     {
@@ -1643,6 +1648,7 @@
 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
                            int worker_num,
                            FreeRegionList* local_cleanup_list,
+                           OldRegionSet* old_proxy_set,
                            HumongousRegionSet* humongous_proxy_set,
                            HRRSCleanupTask* hrrs_cleanup_task)
   : _g1(g1), _worker_num(worker_num),
@@ -1650,6 +1656,7 @@
     _freed_bytes(0),
     _claimed_region_time(0.0), _max_region_time(0.0),
     _local_cleanup_list(local_cleanup_list),
+    _old_proxy_set(old_proxy_set),
     _humongous_proxy_set(humongous_proxy_set),
     _hrrs_cleanup_task(hrrs_cleanup_task) { }

@@ -1665,6 +1672,7 @@
     _g1->free_region_if_empty(hr,
                               &_freed_bytes,
                               _local_cleanup_list,
+                              _old_proxy_set,
                               _humongous_proxy_set,
                               _hrrs_cleanup_task,
                               true /* par */);
@@ -1689,6 +1697,7 @@
     return;
   }

+  HRSPhaseSetter x(HRSPhaseCleanup);
   g1h->verify_region_sets_optional();

   if (VerifyDuringGC) {
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -1203,6 +1203,7 @@
     Universe::print_heap_before_gc();
   }

+  HRSPhaseSetter x(HRSPhaseFullGC);
   verify_region_sets_optional();

   const bool do_clear_all_soft_refs = clear_all_soft_refs ||
@@ -1263,7 +1264,6 @@
     release_mutator_alloc_region();
     abandon_gc_alloc_regions();
     g1_rem_set()->cleanupHRRS();
-    tear_down_region_lists();

     // We should call this after we retire any currently active alloc
     // regions so that all the ALLOC / RETIRE events are generated
@@ -1278,7 +1278,7 @@
     g1_policy()->clear_incremental_cset();
     g1_policy()->stop_incremental_cset_building();

-    empty_young_list();
+    tear_down_region_sets(false /* free_list_only */);
     g1_policy()->set_full_young_gcs(true);

     // See the comments in g1CollectedHeap.hpp and
@@ -1301,9 +1301,7 @@
     }

     assert(free_regions() == 0, "we should not have added any free regions");
-    rebuild_region_lists();
-
-    _summary_bytes_used = recalculate_used();
+    rebuild_region_sets(false /* free_list_only */);

     // Enqueue any discovered reference objects that have
     // not been removed from the discovered lists.
@@ -1764,9 +1762,9 @@
   // Instead of tearing down / rebuilding the free lists here, we
   // could instead use the remove_all_pending() method on free_list to
   // remove only the ones that we need to remove.
-  tear_down_region_lists();  // We will rebuild them in a moment.
+  tear_down_region_sets(true /* free_list_only */);
   shrink_helper(shrink_bytes);
-  rebuild_region_lists();
+  rebuild_region_sets(true /* free_list_only */);

   _hrs.verify_optional();
   verify_region_sets_optional();
@@ -1799,6 +1797,7 @@
   _full_collection(false),
   _free_list("Master Free List"),
   _secondary_free_list("Secondary Free List"),
+  _old_set("Old Set"),
   _humongous_set("Master Humongous Set"),
   _free_regions_coming(false),
   _young_list(new YoungList(this)),
@@ -3007,7 +3006,10 @@

     if (failures) {
       gclog_or_tty->print_cr("Heap:");
-      print_on(gclog_or_tty, true /* extended */);
+      // It helps to have the per-region information in the output to
+      // help us track down what went wrong. This is why we call
+      // print_extended_on() instead of print_on().
+      print_extended_on(gclog_or_tty);
       gclog_or_tty->print_cr("");
 #ifndef PRODUCT
       if (VerifyDuringGC && G1VerifyDuringGCPrintReachable) {
@@ -3033,13 +3035,7 @@
   }
 };

-void G1CollectedHeap::print() const { print_on(tty); }
-
 void G1CollectedHeap::print_on(outputStream* st) const {
-  print_on(st, PrintHeapAtGCExtended);
-}
-
-void G1CollectedHeap::print_on(outputStream* st, bool extended) const {
   st->print(" %-20s", "garbage-first heap");
   st->print(" total " SIZE_FORMAT "K, used " SIZE_FORMAT "K",
             capacity()/K, used_unlocked()/K);
@@ -3057,13 +3053,14 @@
             survivor_regions, survivor_regions * HeapRegion::GrainBytes / K);
   st->cr();
   perm()->as_gen()->print_on(st);
-  if (extended) {
-    st->cr();
-    print_on_extended(st);
-  }
-}
-
-void G1CollectedHeap::print_on_extended(outputStream* st) const {
+}
+
+void G1CollectedHeap::print_extended_on(outputStream* st) const {
+  print_on(st);
+
+  // Print the per-region information.
+  st->cr();
+  st->print_cr("Heap Regions: (Y=young(eden), SU=young(survivor), HS=humongous(starts), HC=humongous(continues), CS=collection set, F=free, TS=gc time stamp, PTAMS=previous top-at-mark-start, NTAMS=next top-at-mark-start)");
   PrintRegionClosure blk(st);
   heap_region_iterate(&blk);
 }
@@ -3352,6 +3349,7 @@
     Universe::print_heap_before_gc();
   }

+  HRSPhaseSetter x(HRSPhaseEvacuation);
   verify_region_sets_optional();
   verify_dirty_young_regions();

@@ -3774,6 +3772,11 @@
       !retained_region->is_empty() &&
       !retained_region->isHumongous()) {
     retained_region->set_saved_mark();
+    // The retained region was added to the old region set when it was
+    // retired. We have to remove it now, since we don't allow regions
+    // we allocate to in the region sets. We'll re-add it later, when
+    // it's retired again.
+    _old_set.remove(retained_region);
     _old_gc_alloc_region.set(retained_region);
     _hr_printer.reuse(retained_region);
   }
@@ -5338,6 +5341,7 @@
 void G1CollectedHeap::free_region_if_empty(HeapRegion* hr,
                                      size_t* pre_used,
                                      FreeRegionList* free_list,
+                                     OldRegionSet* old_proxy_set,
                                      HumongousRegionSet* humongous_proxy_set,
                                      HRRSCleanupTask* hrrs_cleanup_task,
                                      bool par) {
@@ -5346,6 +5350,7 @@
       assert(hr->startsHumongous(), "we should only see starts humongous");
       free_humongous_region(hr, pre_used, free_list, humongous_proxy_set, par);
     } else {
+      _old_set.remove_with_proxy(hr, old_proxy_set);
       free_region(hr, pre_used, free_list, par);
     }
   } else {
@@ -5402,6 +5407,7 @@

 void G1CollectedHeap::update_sets_after_freeing_regions(size_t pre_used,
                                        FreeRegionList* free_list,
+                                       OldRegionSet* old_proxy_set,
                                        HumongousRegionSet* humongous_proxy_set,
                                        bool par) {
   if (pre_used > 0) {
@@ -5417,6 +5423,10 @@
     MutexLockerEx x(FreeList_lock, Mutex::_no_safepoint_check_flag);
     _free_list.add_as_head(free_list);
   }
+  if (old_proxy_set != NULL && !old_proxy_set->is_empty()) {
+    MutexLockerEx x(OldSets_lock, Mutex::_no_safepoint_check_flag);
+    _old_set.update_from_proxy(old_proxy_set);
+  }
   if (humongous_proxy_set != NULL && !humongous_proxy_set->is_empty()) {
     MutexLockerEx x(OldSets_lock, Mutex::_no_safepoint_check_flag);
     _humongous_set.update_from_proxy(humongous_proxy_set);
@@ -5614,6 +5624,8 @@
         cur->set_young_index_in_cset(-1);
       cur->set_not_young();
       cur->set_evacuation_failed(false);
+      // The region is now considered to be old.
+      _old_set.add(cur);
     }
     cur = next;
   }
@@ -5629,6 +5641,7 @@
     young_time_ms += elapsed_ms;

   update_sets_after_freeing_regions(pre_used, &local_free_list,
+                                    NULL /* old_proxy_set */,
                                     NULL /* humongous_proxy_set */,
                                     false /* par */);
   policy->record_young_free_cset_time_ms(young_time_ms);
@@ -5740,52 +5753,106 @@
   return ret;
 }

-void G1CollectedHeap::empty_young_list() {
-  assert(heap_lock_held_for_gc(),
-              "the heap lock should already be held by or for this thread");
-
-  _young_list->empty_list();
-}
-
-// Done at the start of full GC.
-void G1CollectedHeap::tear_down_region_lists() {
-  _free_list.remove_all();
-}
-
-class RegionResetter: public HeapRegionClosure {
-  G1CollectedHeap* _g1h;
-  FreeRegionList _local_free_list;
+class TearDownRegionSetsClosure : public HeapRegionClosure {
+private:
+  OldRegionSet *_old_set;

 public:
-  RegionResetter() : _g1h(G1CollectedHeap::heap()),
-                     _local_free_list("Local Free List for RegionResetter") { }
+  TearDownRegionSetsClosure(OldRegionSet* old_set) : _old_set(old_set) { }

   bool doHeapRegion(HeapRegion* r) {
-    if (r->continuesHumongous()) return false;
-    if (r->top() > r->bottom()) {
-      if (r->top() < r->end()) {
-        Copy::fill_to_words(r->top(),
-                          pointer_delta(r->end(), r->top()));
-      }
+    if (r->is_empty()) {
+      // We ignore empty regions, we'll empty the free list afterwards
+    } else if (r->is_young()) {
+      // We ignore young regions, we'll empty the young list afterwards
+    } else if (r->isHumongous()) {
+      // We ignore humongous regions, we're not tearing down the
+      // humongous region set
     } else {
-      assert(r->is_empty(), "tautology");
-      _local_free_list.add_as_tail(r);
+      // The rest should be old
+      _old_set->remove(r);
     }
     return false;
   }

-  void update_free_lists() {
-    _g1h->update_sets_after_freeing_regions(0, &_local_free_list, NULL,
-                                            false /* par */);
+  ~TearDownRegionSetsClosure() {
+    assert(_old_set->is_empty(), "post-condition");
   }
 };

-// Done at the end of full GC.
-void G1CollectedHeap::rebuild_region_lists() {
-  // This needs to go at the end of the full GC.
-  RegionResetter rs;
-  heap_region_iterate(&rs);
-  rs.update_free_lists();
+void G1CollectedHeap::tear_down_region_sets(bool free_list_only) {
+  assert_at_safepoint(true /* should_be_vm_thread */);
+
+  if (!free_list_only) {
+    TearDownRegionSetsClosure cl(&_old_set);
+    heap_region_iterate(&cl);
+
+    // Need to do this after the heap iteration to be able to
+    // recognize the young regions and ignore them during the iteration.
+    _young_list->empty_list();
+  }
+  _free_list.remove_all();
+}
+
+class RebuildRegionSetsClosure : public HeapRegionClosure {
+private:
+  bool            _free_list_only;
+  OldRegionSet*   _old_set;
+  FreeRegionList* _free_list;
+  size_t          _total_used;
+
+public:
+  RebuildRegionSetsClosure(bool free_list_only,
+                           OldRegionSet* old_set, FreeRegionList* free_list) :
+    _free_list_only(free_list_only),
+    _old_set(old_set), _free_list(free_list), _total_used(0) {
+    assert(_free_list->is_empty(), "pre-condition");
+    if (!free_list_only) {
+      assert(_old_set->is_empty(), "pre-condition");
+    }
+  }
+
+  bool doHeapRegion(HeapRegion* r) {
+    if (r->continuesHumongous()) {
+      return false;
+    }
+
+    if (r->is_empty()) {
+      // Add free regions to the free list
+      _free_list->add_as_tail(r);
+    } else if (!_free_list_only) {
+      assert(!r->is_young(), "we should not come across young regions");
+
+      if (r->isHumongous()) {
+        // We ignore humongous regions, we left the humongous set unchanged
+      } else {
+        // The rest should be old, add them to the old set
+        _old_set->add(r);
+      }
+      _total_used += r->used();
+    }
+
+    return false;
+  }
+
+  size_t total_used() {
+    return _total_used;
+  }
+};
+
+void G1CollectedHeap::rebuild_region_sets(bool free_list_only) {
+  assert_at_safepoint(true /* should_be_vm_thread */);
+
+  RebuildRegionSetsClosure cl(free_list_only, &_old_set, &_free_list);
+  heap_region_iterate(&cl);
+
+  if (!free_list_only) {
+    _summary_bytes_used = cl.total_used();
+  }
+  assert(_summary_bytes_used == recalculate_used(),
+         err_msg("inconsistent _summary_bytes_used, "
+                 "value: "SIZE_FORMAT" recalculated: "SIZE_FORMAT,
+                 _summary_bytes_used, recalculate_used()));
 }

 void G1CollectedHeap::set_refine_cte_cl_concurrency(bool concurrent) {
@@ -5882,6 +5949,8 @@
   g1_policy()->record_bytes_copied_during_gc(allocated_bytes);
   if (ap == GCAllocForSurvived) {
     young_list()->add_survivor_region(alloc_region);
+  } else {
+    _old_set.add(alloc_region);
   }
   _hr_printer.retire(alloc_region);
 }
@@ -5913,15 +5982,17 @@

 class VerifyRegionListsClosure : public HeapRegionClosure {
 private:
+  FreeRegionList*     _free_list;
+  OldRegionSet*       _old_set;
   HumongousRegionSet* _humongous_set;
-  FreeRegionList*     _free_list;
   size_t              _region_count;

 public:
-  VerifyRegionListsClosure(HumongousRegionSet* humongous_set,
+  VerifyRegionListsClosure(OldRegionSet* old_set,
+                           HumongousRegionSet* humongous_set,
                            FreeRegionList* free_list) :
-    _humongous_set(humongous_set), _free_list(free_list),
-    _region_count(0) { }
+    _old_set(old_set), _humongous_set(humongous_set),
+    _free_list(free_list), _region_count(0) { }

   size_t region_count()      { return _region_count;      }

@@ -5938,6 +6009,8 @@
       _humongous_set->verify_next_region(hr);
     } else if (hr->is_empty()) {
       _free_list->verify_next_region(hr);
+    } else {
+      _old_set->verify_next_region(hr);
     }
     return false;
   }
@@ -5964,6 +6037,7 @@
     MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
     _secondary_free_list.verify();
   }
+  _old_set.verify();
   _humongous_set.verify();

   // If a concurrent region freeing operation is in progress it will
@@ -5987,12 +6061,14 @@

   // Finally, make sure that the region accounting in the lists is
   // consistent with what we see in the heap.
+  _old_set.verify_start();
   _humongous_set.verify_start();
   _free_list.verify_start();

-  VerifyRegionListsClosure cl(&_humongous_set, &_free_list);
+  VerifyRegionListsClosure cl(&_old_set, &_humongous_set, &_free_list);
   heap_region_iterate(&cl);

+  _old_set.verify_end();
   _humongous_set.verify_end();
   _free_list.verify_end();
 }
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Fri Nov 18 15:15:13 2011 -0800
@@ -239,6 +239,9 @@
   // master free list when appropriate.
   SecondaryFreeRegionList   _secondary_free_list;

+  // It keeps track of the old regions.
+  MasterOldRegionSet        _old_set;
+
   // It keeps track of the humongous regions.
   MasterHumongousRegionSet  _humongous_set;

@@ -248,10 +251,21 @@
   // The block offset table for the G1 heap.
   G1BlockOffsetSharedArray* _bot_shared;

-  // Move all of the regions off the free lists, then rebuild those free
-  // lists, before and after full GC.
-  void tear_down_region_lists();
-  void rebuild_region_lists();
+  // Tears down the region sets / lists so that they are empty and the
+  // regions on the heap do not belong to a region set / list. The
+  // only exception is the humongous set which we leave unaltered. If
+  // free_list_only is true, it will only tear down the master free
+  // list. It is called before a Full GC (free_list_only == false) or
+  // before heap shrinking (free_list_only == true).
+  void tear_down_region_sets(bool free_list_only);
+
+  // Rebuilds the region sets / lists so that they are repopulated to
+  // reflect the contents of the heap. The only exception is the
+  // humongous set which was not torn down in the first place. If
+  // free_list_only is true, it will only rebuild the master free
+  // list. It is called after a Full GC (free_list_only == false) or
+  // after heap shrinking (free_list_only == true).
+  void rebuild_region_sets(bool free_list_only);

   // The sequence of all heap regions in the heap.
   HeapRegionSeq _hrs;
@@ -1124,6 +1138,10 @@
     }
   }

+  void old_set_remove(HeapRegion* hr) {
+    _old_set.remove(hr);
+  }
+
   void set_free_regions_coming();
   void reset_free_regions_coming();
   bool free_regions_coming() { return _free_regions_coming; }
@@ -1153,6 +1171,7 @@
   void free_region_if_empty(HeapRegion* hr,
                             size_t* pre_used,
                             FreeRegionList* free_list,
+                            OldRegionSet* old_proxy_set,
                             HumongousRegionSet* humongous_proxy_set,
                             HRRSCleanupTask* hrrs_cleanup_task,
                             bool par);
@@ -1163,6 +1182,7 @@
   // (if par is true, it will do so by taking the ParGCRareEvent_lock).
   void update_sets_after_freeing_regions(size_t pre_used,
                                        FreeRegionList* free_list,
+                                       OldRegionSet* old_proxy_set,
                                        HumongousRegionSet* humongous_proxy_set,
                                        bool par);

@@ -1429,14 +1449,8 @@

   // Override; it uses the "prev" marking information
   virtual void verify(bool allow_dirty, bool silent);
-  // Default behavior by calling print(tty);
-  virtual void print() const;
-  // This calls print_on(st, PrintHeapAtGCExtended).
   virtual void print_on(outputStream* st) const;
-  // If extended is true, it will print out information for all
-  // regions in the heap by calling print_on_extended(st).
-  virtual void print_on(outputStream* st, bool extended) const;
-  virtual void print_on_extended(outputStream* st) const;
+  virtual void print_extended_on(outputStream* st) const;

   virtual void print_gc_threads_on(outputStream* st) const;
   virtual void gc_threads_do(ThreadClosure* tc) const;
@@ -1452,8 +1466,6 @@
   // asserted to be this type.
   static G1CollectedHeap* heap();

-  void empty_young_list();
-
   void set_region_short_lived_locked(HeapRegion* hr);
   // add appropriate methods for any other surv rate groups
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -3015,6 +3015,7 @@
       hr = _collectionSetChooser->getNextMarkedRegion(time_remaining_ms,
                                                       avg_prediction);
       if (hr != NULL) {
+        _g1->old_set_remove(hr);
         double predicted_time_ms = predict_region_elapsed_time_ms(hr, false);
         time_remaining_ms -= predicted_time_ms;
         predicted_pause_time_ms += predicted_time_ms;
--- a/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -236,6 +236,7 @@
     // at the end of the GC, so no point in updating those values here.
     _g1h->update_sets_after_freeing_regions(0, /* pre_used */
                                             NULL, /* free_list */
+                                            NULL, /* old_proxy_set */
                                             &_humongous_proxy_set,
                                             false /* par */);
   }
--- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -635,10 +635,18 @@
   ct_freq_note_card(_ct_bs->index_for(start));
 #endif

-  assert(!check_for_refs_into_cset || _cset_rs_update_cl[worker_i] != NULL, "sanity");
+  OopsInHeapRegionClosure* oops_in_heap_closure = NULL;
+  if (check_for_refs_into_cset) {
+    // ConcurrentG1RefineThreads have worker numbers larger than what
+    // _cset_rs_update_cl[] is set up to handle. But those threads should
+    // only be active outside of a collection which means that when they
+    // reach here they should have check_for_refs_into_cset == false.
+    assert((size_t)worker_i < n_workers(), "index of worker larger than _cset_rs_update_cl[].length");
+    oops_in_heap_closure = _cset_rs_update_cl[worker_i];
+  }
   UpdateRSOrPushRefOopClosure update_rs_oop_cl(_g1,
                                                _g1->g1_rem_set(),
-                                               _cset_rs_update_cl[worker_i],
+                                               oops_in_heap_closure,
                                                check_for_refs_into_cset,
                                                worker_i);
   update_rs_oop_cl.set_from(r);
--- a/src/share/vm/gc_implementation/g1/heapRegion.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/gc_implementation/g1/heapRegion.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -722,7 +722,7 @@
     st->print(" F");
   else
     st->print("  ");
-  st->print(" %5d", _gc_time_stamp);
+  st->print(" TS %5d", _gc_time_stamp);
   st->print(" PTAMS "PTR_FORMAT" NTAMS "PTR_FORMAT,
             prev_top_at_mark_start(), next_top_at_mark_start());
   G1OffsetTableContigSpace::print_on(st);
--- a/src/share/vm/gc_implementation/g1/heapRegionSet.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/gc_implementation/g1/heapRegionSet.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -26,6 +26,7 @@
 #include "gc_implementation/g1/heapRegionSet.inline.hpp"

 size_t HeapRegionSetBase::_unrealistically_long_length = 0;
+HRSPhase HeapRegionSetBase::_phase = HRSPhaseNone;

 //////////////////// HeapRegionSetBase ////////////////////

@@ -192,6 +193,17 @@
   _verify_in_progress = false;
 }

+void HeapRegionSetBase::clear_phase() {
+  assert(_phase != HRSPhaseNone, "pre-condition");
+  _phase = HRSPhaseNone;
+}
+
+void HeapRegionSetBase::set_phase(HRSPhase phase) {
+  assert(_phase == HRSPhaseNone, "pre-condition");
+  assert(phase != HRSPhaseNone, "pre-condition");
+  _phase = phase;
+}
+
 void HeapRegionSetBase::print_on(outputStream* out, bool print_contents) {
   out->cr();
   out->print_cr("Set: %s ("PTR_FORMAT")", name(), this);
--- a/src/share/vm/gc_implementation/g1/heapRegionSet.hpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/gc_implementation/g1/heapRegionSet.hpp	Fri Nov 18 15:15:13 2011 -0800
@@ -47,8 +47,18 @@

 class hrs_ext_msg;

+typedef enum {
+  HRSPhaseNone,
+  HRSPhaseEvacuation,
+  HRSPhaseCleanup,
+  HRSPhaseFullGC
+} HRSPhase;
+
+class HRSPhaseSetter;
+
 class HeapRegionSetBase VALUE_OBJ_CLASS_SPEC {
   friend class hrs_ext_msg;
+  friend class HRSPhaseSetter;

 protected:
   static size_t calculate_region_num(HeapRegion* hr);
@@ -80,6 +90,15 @@
   size_t      _calc_total_capacity_bytes;
   size_t      _calc_total_used_bytes;

+  // This is here so that it can be used in the subclasses to assert
+  // something different depending on which phase the GC is in. This
+  // can be particularly helpful in the check_mt_safety() methods.
+  static HRSPhase _phase;
+
+  // Only used by HRSPhaseSetter.
+  static void clear_phase();
+  static void set_phase(HRSPhase phase);
+
   // verify_region() is used to ensure that the contents of a region
   // added to / removed from a set are consistent. Different sets
   // make different assumptions about the regions added to them. So
@@ -177,6 +196,16 @@
   }
 };

+class HRSPhaseSetter {
+public:
+  HRSPhaseSetter(HRSPhase phase) {
+    HeapRegionSetBase::set_phase(phase);
+  }
+  ~HRSPhaseSetter() {
+    HeapRegionSetBase::clear_phase();
+  }
+};
+
 // These two macros are provided for convenience, to keep the uses of
 // these two asserts a bit more concise.
--- a/src/share/vm/gc_implementation/g1/heapRegionSets.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/gc_implementation/g1/heapRegionSets.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -26,6 +26,17 @@
 #include "gc_implementation/g1/heapRegionRemSet.hpp"
 #include "gc_implementation/g1/heapRegionSets.hpp"

+// Note on the check_mt_safety() methods below:
+//
+// Verification of the "master" heap region sets / lists that are
+// maintained by G1CollectedHeap is always done during a STW pause and
+// by the VM thread at the start / end of the pause. The standard
+// verification methods all assert check_mt_safety(). This is
+// important as it ensures that verification is done without
+// concurrent updates taking place at the same time. It follows, that,
+// for the "master" heap region sets / lists, the check_mt_safety()
+// method should include the VM thread / STW case.
+
 //////////////////// FreeRegionList ////////////////////

 const char* FreeRegionList::verify_region_extra(HeapRegion* hr) {
@@ -33,7 +44,7 @@
     return "the region should not be young";
   }
   // The superclass will check that the region is empty and
-  // not-humongous.
+  // not humongous.
   return HeapRegionLinkedList::verify_region_extra(hr);
 }

@@ -58,12 +69,16 @@
   // (b) If we're not at a safepoint, operations on the master free
   // list should be invoked while holding the Heap_lock.

-  guarantee((SafepointSynchronize::is_at_safepoint() &&
-               (Thread::current()->is_VM_thread() ||
-                                            FreeList_lock->owned_by_self())) ||
-            (!SafepointSynchronize::is_at_safepoint() &&
-                                                Heap_lock->owned_by_self()),
-            hrs_ext_msg(this, "master free list MT safety protocol"));
+  if (SafepointSynchronize::is_at_safepoint()) {
+    guarantee(Thread::current()->is_VM_thread() ||
+              FreeList_lock->owned_by_self(),
+              hrs_ext_msg(this, "master free list MT safety protocol "
+                                "at a safepoint"));
+  } else {
+    guarantee(Heap_lock->owned_by_self(),
+              hrs_ext_msg(this, "master free list MT safety protocol "
+                                "outside a safepoint"));
+  }

   return FreeRegionList::check_mt_safety();
 }
@@ -81,6 +96,48 @@
   return FreeRegionList::check_mt_safety();
 }

+//////////////////// OldRegionSet ////////////////////
+
+const char* OldRegionSet::verify_region_extra(HeapRegion* hr) {
+  if (hr->is_young()) {
+    return "the region should not be young";
+  }
+  // The superclass will check that the region is not empty and not
+  // humongous.
+  return HeapRegionSet::verify_region_extra(hr);
+}
+
+//////////////////// MasterOldRegionSet ////////////////////
+
+bool MasterOldRegionSet::check_mt_safety() {
+  // Master Old Set MT safety protocol:
+  // (a) If we're at a safepoint, operations on the master old set
+  // should be invoked:
+  // - by the VM thread (which will serialize them), or
+  // - by the GC workers while holding the FreeList_lock, if we're
+  //   at a safepoint for an evacuation pause (this lock is taken
+  //   anyway when an GC alloc region is retired so that a new one
+  //   is allocated from the free list), or
+  // - by the GC workers while holding the OldSets_lock, if we're at a
+  //   safepoint for a cleanup pause.
+  // (b) If we're not at a safepoint, operations on the master old set
+  // should be invoked while holding the Heap_lock.
+
+  if (SafepointSynchronize::is_at_safepoint()) {
+    guarantee(Thread::current()->is_VM_thread() ||
+              _phase == HRSPhaseEvacuation && FreeList_lock->owned_by_self() ||
+              _phase == HRSPhaseCleanup && OldSets_lock->owned_by_self(),
+              hrs_ext_msg(this, "master old set MT safety protocol "
+                                "at a safepoint"));
+  } else {
+    guarantee(Heap_lock->owned_by_self(),
+              hrs_ext_msg(this, "master old set MT safety protocol "
+                                "outside a safepoint"));
+  }
+
+  return OldRegionSet::check_mt_safety();
+}
+
 //////////////////// HumongousRegionSet ////////////////////

 const char* HumongousRegionSet::verify_region_extra(HeapRegion* hr) {
@@ -103,11 +160,16 @@
   // (b) If we're not at a safepoint, operations on the master
   // humongous set should be invoked while holding the Heap_lock.

-  guarantee((SafepointSynchronize::is_at_safepoint() &&
-               (Thread::current()->is_VM_thread() ||
-                                             OldSets_lock->owned_by_self())) ||
-            (!SafepointSynchronize::is_at_safepoint() &&
-                                                 Heap_lock->owned_by_self()),
-            hrs_ext_msg(this, "master humongous set MT safety protocol"));
+  if (SafepointSynchronize::is_at_safepoint()) {
+    guarantee(Thread::current()->is_VM_thread() ||
+              OldSets_lock->owned_by_self(),
+              hrs_ext_msg(this, "master humongous set MT safety protocol "
+                                "at a safepoint"));
+  } else {
+    guarantee(Heap_lock->owned_by_self(),
+              hrs_ext_msg(this, "master humongous set MT safety protocol "
+                                "outside a safepoint"));
+  }
+
   return HumongousRegionSet::check_mt_safety();
 }
--- a/src/share/vm/gc_implementation/g1/heapRegionSets.hpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/gc_implementation/g1/heapRegionSets.hpp	Fri Nov 18 15:15:13 2011 -0800
@@ -61,6 +61,30 @@
   SecondaryFreeRegionList(const char* name) : FreeRegionList(name) { }
 };

+//////////////////// OldRegionSet ////////////////////
+
+class OldRegionSet : public HeapRegionSet {
+protected:
+  virtual const char* verify_region_extra(HeapRegion* hr);
+
+  virtual bool regions_humongous() { return false; }
+  virtual bool regions_empty()     { return false; }
+
+public:
+  OldRegionSet(const char* name) : HeapRegionSet(name) { }
+};
+
+//////////////////// MasterOldRegionSet ////////////////////
+
+class MasterOldRegionSet : public OldRegionSet {
+private:
+protected:
+  virtual bool check_mt_safety();
+
+public:
+  MasterOldRegionSet(const char* name) : OldRegionSet(name) { }
+};
+
 //////////////////// HumongousRegionSet ////////////////////

 class HumongousRegionSet : public HeapRegionSet {
--- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -863,8 +863,6 @@
   ensure_parsability(false);  // no need to retire TLABs for verification
 }

-void ParallelScavengeHeap::print() const { print_on(tty); }
-
 void ParallelScavengeHeap::print_on(outputStream* st) const {
   young_gen()->print_on(st);
   old_gen()->print_on(st);
--- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp	Fri Nov 18 15:15:13 2011 -0800
@@ -246,8 +246,7 @@
   jlong millis_since_last_gc();

   void prepare_for_verify();
-  void print() const;
-  void print_on(outputStream* st) const;
+  virtual void print_on(outputStream* st) const;
   virtual void print_gc_threads_on(outputStream* st) const;
   virtual void gc_threads_do(ThreadClosure* tc) const;
   virtual void print_tracing_info() const;
--- a/src/share/vm/gc_interface/collectedHeap.hpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/gc_interface/collectedHeap.hpp	Fri Nov 18 15:15:13 2011 -0800
@@ -590,13 +590,27 @@
   void pre_full_gc_dump();
   void post_full_gc_dump();

-  virtual void print() const = 0;
+  // Print heap information on the given outputStream.
   virtual void print_on(outputStream* st) const = 0;
+  // The default behavior is to call print_on() on tty.
+  virtual void print() const {
+    print_on(tty);
+  }
+  // Print more detailed heap information on the given
+  // outputStream. The default behaviour is to call print_on(). It is
+  // up to each subclass to override it and add any additional output
+  // it needs.
+  virtual void print_extended_on(outputStream* st) const {
+    print_on(st);
+  }

   // Print all GC threads (other than the VM thread)
   // used by this heap.
   virtual void print_gc_threads_on(outputStream* st) const = 0;
-  void print_gc_threads() { print_gc_threads_on(tty); }
+  // The default behavior is to call print_gc_threads_on() on tty.
+  void print_gc_threads() {
+    print_gc_threads_on(tty);
+  }
   // Iterator for all GC threads (other than VM thread)
   virtual void gc_threads_do(ThreadClosure* tc) const = 0;
--- a/src/share/vm/gc_interface/gcCause.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/gc_interface/gcCause.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -42,12 +42,6 @@
     case _jvmti_force_gc:
       return "JvmtiEnv ForceGarbageCollection";

-    case _no_gc:
-      return "No GC";
-
-    case _allocation_failure:
-      return "Allocation Failure";
-
     case _gc_locker:
       return "GCLocker Initiated GC";

@@ -57,6 +51,12 @@
     case _heap_dump:
       return "Heap Dump Initiated GC";

+    case _no_gc:
+      return "No GC";
+
+    case _allocation_failure:
+      return "Allocation Failure";
+
     case _tenured_generation_full:
       return "Tenured Generation Full";

@@ -78,6 +78,9 @@
     case _old_generation_too_full_to_scavenge:
       return "Old Generation Too Full To Scavenge";

+    case _adaptive_size_policy:
+      return "Ergonomics";
+
     case _g1_inc_collection_pause:
       return "G1 Evacuation Pause";
--- a/src/share/vm/interpreter/bytecode.hpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/interpreter/bytecode.hpp	Fri Nov 18 15:15:13 2011 -0800
@@ -234,6 +234,13 @@
                                                           is_invokespecial()   ||
                                                           is_invokedynamic(); }

+  bool is_method_handle_invoke() const {
+    return (is_invokedynamic() ||
+            (is_invokevirtual() &&
+             method()->constants()->klass_ref_at_noresolve(index()) == vmSymbols::java_lang_invoke_MethodHandle() &&
+             methodOopDesc::is_method_handle_invoke_name(name())));
+  }
+
   // Helper to skip verification.   Used is_valid() to check if the result is really an invoke
   inline friend Bytecode_invoke Bytecode_invoke_check(methodHandle method, int bci);
 };
--- a/src/share/vm/interpreter/bytecodeTracer.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/interpreter/bytecodeTracer.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -241,7 +241,7 @@
         st->print_cr(" not secondary entry?", i);
         return false;
       }
-      i = cache->entry_at(i)->main_entry_index();
+      i = cache->entry_at(i)->main_entry_index() + constantPoolOopDesc::CPCACHE_INDEX_TAG;
       goto check_cache_index;
     } else {
       st->print_cr(" not in cache[*]?", i);
--- a/src/share/vm/interpreter/interpreterRuntime.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/interpreter/interpreterRuntime.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -549,8 +549,8 @@

   if (is_put && !is_static && klass->is_subclass_of(SystemDictionary::CallSite_klass()) && (info.name() == vmSymbols::target_name())) {
     const jint direction = frame::interpreter_frame_expression_stack_direction();
-    oop call_site     = *((oop*) thread->last_frame().interpreter_frame_tos_at(-1 * direction));
-    oop method_handle = *((oop*) thread->last_frame().interpreter_frame_tos_at( 0 * direction));
+    Handle call_site    (THREAD, *((oop*) thread->last_frame().interpreter_frame_tos_at(-1 * direction)));
+    Handle method_handle(THREAD, *((oop*) thread->last_frame().interpreter_frame_tos_at( 0 * direction)));
     assert(call_site    ->is_a(SystemDictionary::CallSite_klass()),     "must be");
     assert(method_handle->is_a(SystemDictionary::MethodHandle_klass()), "must be");
--- a/src/share/vm/libadt/vectset.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/libadt/vectset.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -350,6 +350,21 @@
   return (int)_xor;
 }

+//------------------------------iterate----------------------------------------
+// Used by Set::print().
+class VSetI_ : public SetI_ {
+  VectorSetI vsi;
+public:
+  VSetI_( const VectorSet *vset, uint &elem ) : vsi(vset) { elem = vsi.elem; }
+
+  uint next(void) { ++vsi; return vsi.elem; }
+  int  test(void) { return vsi.test(); }
+};
+
+SetI_ *VectorSet::iterate(uint &elem) const {
+  return new(ResourceObj::C_HEAP) VSetI_(this, elem);
+}
+
 //=============================================================================
 //------------------------------next-------------------------------------------
 // Find and return the next element of a vector set, or return garbage and
--- a/src/share/vm/libadt/vectset.hpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/libadt/vectset.hpp	Fri Nov 18 15:15:13 2011 -0800
@@ -151,7 +151,7 @@


 private:
-  SetI_ *iterate(uint&) const { ShouldNotCallThis(); return NULL; } // Removed
+  SetI_ *iterate(uint&) const;
 };

 //------------------------------Iteration--------------------------------------
--- a/src/share/vm/memory/genCollectedHeap.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/memory/genCollectedHeap.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -1270,7 +1270,6 @@
   rem_set()->verify();
 }

-void GenCollectedHeap::print() const { print_on(tty); }
 void GenCollectedHeap::print_on(outputStream* st) const {
   for (int i = 0; i < _n_gens; i++) {
     _gens[i]->print_on(st);
--- a/src/share/vm/memory/genCollectedHeap.hpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/memory/genCollectedHeap.hpp	Fri Nov 18 15:15:13 2011 -0800
@@ -360,8 +360,7 @@
   void verify(bool allow_dirty, bool silent, VerifyOption option);

   // Override.
-  void print() const;
-  void print_on(outputStream* st) const;
+  virtual void print_on(outputStream* st) const;
   virtual void print_gc_threads_on(outputStream* st) const;
   virtual void gc_threads_do(ThreadClosure* tc) const;
   virtual void print_tracing_info() const;
--- a/src/share/vm/memory/universe.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/memory/universe.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -1281,11 +1281,17 @@
   }
 }

-void Universe::print() { print_on(gclog_or_tty); }
+void Universe::print() {
+  print_on(gclog_or_tty);
+}

-void Universe::print_on(outputStream* st) {
+void Universe::print_on(outputStream* st, bool extended) {
   st->print_cr("Heap");
-  heap()->print_on(st);
+  if (!extended) {
+    heap()->print_on(st);
+  } else {
+    heap()->print_extended_on(st);
+  }
 }

 void Universe::print_heap_at_SIGBREAK() {
@@ -1301,14 +1307,22 @@
   st->print_cr("{Heap before GC invocations=%u (full %u):",
                heap()->total_collections(),
                heap()->total_full_collections());
-  heap()->print_on(st);
+  if (!PrintHeapAtGCExtended) {
+    heap()->print_on(st);
+  } else {
+    heap()->print_extended_on(st);
+  }
 }

 void Universe::print_heap_after_gc(outputStream* st) {
   st->print_cr("Heap after GC invocations=%u (full %u):",
                heap()->total_collections(),
                heap()->total_full_collections());
-  heap()->print_on(st);
+  if (!PrintHeapAtGCExtended) {
+    heap()->print_on(st);
+  } else {
+    heap()->print_extended_on(st);
+  }
   st->print_cr("}");
 }
--- a/src/share/vm/memory/universe.hpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/memory/universe.hpp	Fri Nov 18 15:15:13 2011 -0800
@@ -414,9 +414,13 @@
   static bool verify_in_progress() { return _verify_in_progress; }
   static void verify(bool allow_dirty = true, bool silent = false,
                      VerifyOption option = VerifyOption_Default );
-  static int  verify_count()                  { return _verify_count; }
+  static int  verify_count()       { return _verify_count; }
+  // The default behavior is to call print_on() on gclog_or_tty.
   static void print();
-  static void print_on(outputStream* st);
+  // The extended parameter determines which method on the heap will
+  // be called: print_on() (extended == false) or print_extended_on()
+  // (extended == true).
+  static void print_on(outputStream* st, bool extended = false);
   static void print_heap_at_SIGBREAK();
   static void print_heap_before_gc() { print_heap_before_gc(gclog_or_tty); }
   static void print_heap_after_gc()  { print_heap_after_gc(gclog_or_tty); }
--- a/src/share/vm/oops/arrayOop.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/oops/arrayOop.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -23,9 +23,40 @@
  */

 #include "precompiled.hpp"
+
+/////////////// Unit tests ///////////////
+
+#ifndef PRODUCT
+
 #include "oops/arrayOop.hpp"
-#include "oops/objArrayOop.hpp"
-#include "oops/oop.inline.hpp"
-#include "oops/symbol.hpp"
+#include "utilities/globalDefinitions.hpp"
+
+bool arrayOopDesc::check_max_length_overflow(BasicType type) {
+  julong length = max_array_length(type);
+  julong bytes_per_element = type2aelembytes(type);
+  julong bytes = length * bytes_per_element + header_size_in_bytes();
+  return (julong)(size_t)bytes == bytes;
+}
+
+bool arrayOopDesc::test_max_array_length() {
+  tty->print_cr("test_max_array_length");

-// <<this page is intentionally left blank>>
+  assert(check_max_length_overflow(T_BOOLEAN), "size_t overflow for boolean array");
+  assert(check_max_length_overflow(T_CHAR), "size_t overflow for char array");
+  assert(check_max_length_overflow(T_FLOAT), "size_t overflow for float array");
+  assert(check_max_length_overflow(T_DOUBLE), "size_t overflow for double array");
+  assert(check_max_length_overflow(T_BYTE), "size_t overflow for byte array");
+  assert(check_max_length_overflow(T_SHORT), "size_t overflow for short array");
+  assert(check_max_length_overflow(T_INT), "size_t overflow for int array");
+  assert(check_max_length_overflow(T_LONG), "size_t overflow for long array");
+  assert(check_max_length_overflow(T_OBJECT), "size_t overflow for object array");
+  assert(check_max_length_overflow(T_ARRAY), "size_t overflow for array array");
+  assert(check_max_length_overflow(T_NARROWOOP), "size_t overflow for narrowOop array");
+
+  // T_VOID and T_ADDRESS are not supported by max_array_length()
+
+  return true;
+}
+
+
+#endif //PRODUCT
--- a/src/share/vm/oops/arrayOop.hpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/oops/arrayOop.hpp	Fri Nov 18 15:15:13 2011 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -104,20 +104,32 @@

   // Return the maximum length of an array of BasicType.  The length can passed
   // to typeArrayOop::object_size(scale, length, header_size) without causing an
-  // overflow.
+  // overflow. We also need to make sure that this will not overflow a size_t on
+  // 32 bit platforms when we convert it to a byte size.
   static int32_t max_array_length(BasicType type) {
     assert(type >= 0 && type < T_CONFLICT, "wrong type");
     assert(type2aelembytes(type) != 0, "wrong type");
-    const int bytes_per_element = type2aelembytes(type);
-    if (bytes_per_element < HeapWordSize) {
-      return max_jint;
-    }

-    const int32_t max_words = align_size_down(max_jint, MinObjAlignment);
-    const int32_t max_element_words = max_words - header_size(type);
-    const int32_t words_per_element = bytes_per_element >> LogHeapWordSize;
-    return max_element_words / words_per_element;
+    const size_t max_element_words_per_size_t =
+      align_size_down((SIZE_MAX/HeapWordSize - header_size(type)), MinObjAlignment);
+    const size_t max_elements_per_size_t =
+      HeapWordSize * max_element_words_per_size_t / type2aelembytes(type);
+    if ((size_t)max_jint < max_elements_per_size_t) {
+      // It should be ok to return max_jint here, but parts of the code
+      // (CollectedHeap, Klass::oop_oop_iterate(), and more) uses an int for
+      // passing around the size (in words) of an object. So, we need to avoid
+      // overflowing an int when we add the header. See CRs 4718400 and 7110613.
+      return align_size_down(max_jint - header_size(type), MinObjAlignment);
+    }
+    return (int32_t)max_elements_per_size_t;
   }
+
+// for unit testing
+#ifndef PRODUCT
+  static bool check_max_length_overflow(BasicType type);
+  static int32_t old_max_array_length(BasicType type);
+  static bool test_max_array_length();
+#endif
 };

 #endif // SHARE_VM_OOPS_ARRAYOOP_HPP
--- a/src/share/vm/oops/constantPoolKlass.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/oops/constantPoolKlass.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -532,7 +532,7 @@
     if (cp->tag_at(i).is_unresolved_klass()) {
       // This will force loading of the class
       klassOop klass = cp->klass_at(i, CHECK);
-      if (klass->is_instance()) {
+      if (klass->klass_part()->oop_is_instance()) {
         // Force initialization of class
         instanceKlass::cast(klass)->initialize(CHECK);
       }
--- a/src/share/vm/opto/addnode.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/opto/addnode.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -34,8 +34,6 @@

 // Portions of code courtesy of Clifford Click

-#define MAXFLOAT        ((float)3.40282346638528860e+38)
-
 // Classic Add functionality.  This covers all the usual 'add' behaviors for
 // an algebraic ring.  Add-integer, add-float, add-double, and binary-or are
 // all inherited from this class.  The various identity values are supplied
--- a/src/share/vm/opto/c2_globals.hpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/opto/c2_globals.hpp	Fri Nov 18 15:15:13 2011 -0800
@@ -201,7 +201,7 @@
   diagnostic(bool, UnrollLimitCheck, true,                                  \
           "Additional overflow checks during loop unroll")                  \
                                                                             \
-  product(bool, OptimizeFill, false,                                        \
+  product(bool, OptimizeFill, true,                                         \
           "convert fill/copy loops into intrinsic")                         \
                                                                             \
   develop(bool, TraceOptimizeFill, false,                                   \
@@ -459,7 +459,7 @@
   product(bool, UseOptoBiasInlining, true,                                  \
           "Generate biased locking code in C2 ideal graph")                 \
                                                                             \
-  product(bool, OptimizeStringConcat, false,                                \
+  product(bool, OptimizeStringConcat, true,                                 \
           "Optimize the construction of Strings by StringBuilder")          \
                                                                             \
   notproduct(bool, PrintOptimizeStringConcat, false,                        \
--- a/src/share/vm/opto/callGenerator.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/opto/callGenerator.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -775,15 +775,15 @@

   Node* bol = NULL;
   int bc = jvms->method()->java_code_at_bci(jvms->bci());
-  if (bc == Bytecodes::_invokespecial) {
-    // This is the selectAlternative idiom for guardWithTest
+  if (bc != Bytecodes::_invokedynamic) {
+    // This is the selectAlternative idiom for guardWithTest or
+    // similar idioms.
     Node* receiver = kit.argument(0);

     // Check if the MethodHandle is the expected one
     Node* cmp = gvn.transform(new(kit.C, 3) CmpPNode(receiver, predicted_mh));
     bol = gvn.transform(new(kit.C, 2) BoolNode(cmp, BoolTest::eq) );
   } else {
-    assert(bc == Bytecodes::_invokedynamic, "must be");
     // Get the constant pool cache from the caller class.
     ciMethod* caller_method = jvms->method();
     ciBytecodeStream str(caller_method);
--- a/src/share/vm/opto/compile.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/opto/compile.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -346,15 +346,15 @@
 // Disconnect all useless nodes by disconnecting those at the boundary.
 void Compile::remove_useless_nodes(Unique_Node_List &useful) {
   uint next = 0;
-  while( next < useful.size() ) {
+  while (next < useful.size()) {
     Node *n = useful.at(next++);
     // Use raw traversal of out edges since this code removes out edges
     int max = n->outcnt();
-    for (int j = 0; j < max; ++j ) {
+    for (int j = 0; j < max; ++j) {
       Node* child = n->raw_out(j);
-      if( ! useful.member(child) ) {
-        assert( !child->is_top() || child != top(),
-                "If top is cached in Compile object it is in useful list");
+      if (! useful.member(child)) {
+        assert(!child->is_top() || child != top(),
+               "If top is cached in Compile object it is in useful list");
         // Only need to remove this out-edge to the useless node
         n->raw_del_out(j);
         --j;
@@ -362,7 +362,14 @@
       }
     }
     if (n->outcnt() == 1 && n->has_special_unique_user()) {
-      record_for_igvn( n->unique_out() );
+      record_for_igvn(n->unique_out());
+    }
+  }
+  // Remove useless macro and predicate opaq nodes
+  for (int i = C->macro_count()-1; i >= 0; i--) {
+    Node* n = C->macro_node(i);
+    if (!useful.member(n)) {
+      remove_macro_node(n);
     }
   }
   debug_only(verify_graph_edges(true/*check for no_dead_code*/);)
@@ -719,6 +726,7 @@
       while (_late_inlines.length() > 0) {
         CallGenerator* cg = _late_inlines.pop();
         cg->do_late_inline();
+        if (failing())  return;
       }
     }
     assert(_late_inlines.length() == 0, "should have been processed");
@@ -1691,13 +1699,20 @@

   // Perform escape analysis
   if (_do_escape_analysis && ConnectionGraph::has_candidates(this)) {
+    if (has_loops()) {
+      // Cleanup graph (remove dead nodes).
+      TracePhase t2("idealLoop", &_t_idealLoop, true);
+      PhaseIdealLoop ideal_loop( igvn, false, true );
+      if (major_progress()) print_method("PhaseIdealLoop before EA", 2);
+      if (failing())  return;
+    }
     TracePhase t2("escapeAnalysis", &_t_escapeAnalysis, true);
     ConnectionGraph::do_analysis(this, &igvn);

     if (failing())  return;

     igvn.optimize();
-    print_method("Iter GVN 3", 2);
+    print_method("Iter GVN after EA", 2);

     if (failing())  return;
--- a/src/share/vm/opto/escape.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/opto/escape.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -108,14 +108,16 @@
   // Add ConP(#NULL) and ConN(#NULL) nodes.
   Node* oop_null = igvn->zerocon(T_OBJECT);
   _oop_null = oop_null->_idx;
-  assert(_oop_null < C->unique(), "should be created already");
+  assert(_oop_null < nodes_size(), "should be created already");
   add_node(oop_null, PointsToNode::JavaObject, PointsToNode::NoEscape, true);

   if (UseCompressedOops) {
     Node* noop_null = igvn->zerocon(T_NARROWOOP);
     _noop_null = noop_null->_idx;
-    assert(_noop_null < C->unique(), "should be created already");
+    assert(_noop_null < nodes_size(), "should be created already");
     add_node(noop_null, PointsToNode::JavaObject, PointsToNode::NoEscape, true);
+  } else {
+    _noop_null = _oop_null; // Should be initialized
   }
 }

@@ -174,6 +176,9 @@
 }

 void ConnectionGraph::set_escape_state(uint ni, PointsToNode::EscapeState es) {
+  // Don't change non-escaping state of NULL pointer.
+  if (ni == _noop_null || ni == _oop_null)
+    return;
   PointsToNode *npt = ptnode_adr(ni);
   PointsToNode::EscapeState old_es = npt->escape_state();
   if (es > old_es)
@@ -231,8 +236,8 @@
   }
   if (orig_es != es) {
     // cache the computed escape state
-    assert(es != PointsToNode::UnknownEscape, "should have computed an escape state");
-    ptnode_adr(idx)->set_escape_state(es);
+    assert(es > orig_es, "should have computed an escape state");
+    set_escape_state(idx, es);
   } // orig_es could be PointsToNode::UnknownEscape
   return es;
 }
@@ -334,7 +339,7 @@
         add_pointsto_edge(ni, etgt);
         if(etgt == _phantom_object) {
           // Special case - field set outside (globally escaping).
-          ptn->set_escape_state(PointsToNode::GlobalEscape);
+          set_escape_state(ni, PointsToNode::GlobalEscape);
         }
       } else if (et == PointsToNode::DeferredEdge) {
         deferred_edges->append(etgt);
@@ -373,16 +378,17 @@
 // whose offset matches "offset".
 void ConnectionGraph::add_deferred_edge_to_fields(uint from_i, uint adr_i, int offs) {
   PointsToNode* an = ptnode_adr(adr_i);
+  bool is_alloc = an->_node->is_Allocate();
   for (uint fe = 0; fe < an->edge_count(); fe++) {
     assert(an->edge_type(fe) == PointsToNode::FieldEdge, "expecting a field edge");
     int fi = an->edge_target(fe);
     PointsToNode* pf = ptnode_adr(fi);
-    int po = pf->offset();
-    if (pf->edge_count() == 0) {
-      // we have not seen any stores to this field, assume it was set outside this method
+    int offset = pf->offset();
+    if (!is_alloc) {
+      // Assume the field was set outside this method if it is not Allocation
       add_pointsto_edge(fi, _phantom_object);
     }
-    if (po == offs || po == Type::OffsetBot || offs == Type::OffsetBot) {
+    if (offset == offs || offset == Type::OffsetBot || offs == Type::OffsetBot) {
       add_deferred_edge(from_i, fi);
     }
   }
@@ -1036,7 +1042,7 @@
       PointsToNode::EscapeState es = escape_state(alloc);
       // We have an allocation or call which returns a Java object,
       // see if it is unescaped.
-      if (es != PointsToNode::NoEscape || !ptn->_scalar_replaceable)
+      if (es != PointsToNode::NoEscape || !ptn->scalar_replaceable())
         continue;

       // Find CheckCastPP for the allocate or for the return value of a call
@@ -1085,7 +1091,7 @@
         // so it could be eliminated.
         alloc->as_Allocate()->_is_scalar_replaceable = true;
       }
-      set_escape_state(n->_idx, es);
+      set_escape_state(n->_idx, es); // CheckCastPP escape state
       // in order for an object to be scalar-replaceable, it must be:
       //   - a direct allocation (not a call returning an object)
       //   - non-escaping
@@ -1097,15 +1103,14 @@
       set_map(n->_idx, alloc);
       const TypeOopPtr *t = igvn->type(n)->isa_oopptr();
       if (t == NULL)
-        continue;  // not a TypeInstPtr
+        continue;  // not a TypeOopPtr
       tinst = t->cast_to_exactness(true)->is_oopptr()->cast_to_instance_id(ni);
       igvn->hash_delete(n);
       igvn->set_type(n,  tinst);
       n->raise_bottom_type(tinst);
       igvn->hash_insert(n);
       record_for_optimizer(n);
-      if (alloc->is_Allocate() && ptn->_scalar_replaceable &&
-          (t->isa_instptr() || t->isa_aryptr())) {
+      if (alloc->is_Allocate() && (t->isa_instptr() || t->isa_aryptr())) {

         // First, put on the worklist all Field edges from Connection Graph
         // which is more accurate then putting immediate users from Ideal Graph.
@@ -1533,7 +1538,8 @@
     worklist_init.push(C->root());
   }

-  GrowableArray<int> cg_worklist;
+  GrowableArray<Node*> alloc_worklist;
+  GrowableArray<Node*> addp_worklist;
   PhaseGVN* igvn = _igvn;
   bool has_allocations = false;

@@ -1546,11 +1552,13 @@
     if (n->is_Allocate() || n->is_CallStaticJava() &&
         ptnode_adr(n->_idx)->node_type() == PointsToNode::JavaObject) {
       has_allocations = true;
+      if (n->is_Allocate())
+        alloc_worklist.append(n);
     }
     if(n->is_AddP()) {
       // Collect address nodes. Use them during stage 3 below
       // to build initial connection graph field edges.
-      cg_worklist.append(n->_idx);
+      addp_worklist.append(n);
     } else if (n->is_MergeMem()) {
       // Collect all MergeMem nodes to add memory slices for
       // scalar replaceable objects in split_unique_types().
@@ -1576,10 +1584,9 @@

   // 3. Pass to create initial fields edges (JavaObject -F-> AddP)
   //    to reduce number of iterations during stage 4 below.
-  uint cg_length = cg_worklist.length();
-  for( uint next = 0; next < cg_length; ++next ) {
-    int ni = cg_worklist.at(next);
-    Node* n = ptnode_adr(ni)->_node;
+  uint addp_length = addp_worklist.length();
+  for( uint next = 0; next < addp_length; ++next ) {
+    Node* n = addp_worklist.at(next);
     Node* base = get_addp_base(n);
     if (base->is_Proj())
       base = base->in(0);
@@ -1589,7 +1596,7 @@
     }
   }

-  cg_worklist.clear();
+  GrowableArray<int> cg_worklist;
   cg_worklist.append(_phantom_object);
   GrowableArray<uint>  worklist;

@@ -1648,73 +1655,44 @@

   Arena* arena = Thread::current()->resource_area();
   VectorSet visited(arena);
+
+  // 5. Find fields initializing values for not escaped allocations
+  uint alloc_length = alloc_worklist.length();
+  for (uint next = 0; next < alloc_length; ++next) {
+    Node* n = alloc_worklist.at(next);
+    if (ptnode_adr(n->_idx)->escape_state() == PointsToNode::NoEscape) {
+      find_init_values(n, &visited, igvn);
+    }
+  }
+
   worklist.clear();

-  // 5. Remove deferred edges from the graph and adjust
-  //    escape state of nonescaping objects.
-  cg_length = cg_worklist.length();
-  for( uint next = 0; next < cg_length; ++next ) {
+  // 6. Remove deferred edges from the graph.
+  uint cg_length = cg_worklist.length();
+  for (uint next = 0; next < cg_length; ++next) {
     int ni = cg_worklist.at(next);
     PointsToNode* ptn = ptnode_adr(ni);
     PointsToNode::NodeType nt = ptn->node_type();
     if (nt == PointsToNode::LocalVar || nt == PointsToNode::Field) {
       remove_deferred(ni, &worklist, &visited);
       Node *n = ptn->_node;
-      if (n->is_AddP()) {
-        // Search for objects which are not scalar replaceable
-        // and adjust their escape state.
-        adjust_escape_state(ni, igvn);
-      }
     }
   }

-  // 6. Propagate escape states.
-  worklist.clear();
-  bool has_non_escaping_obj = false;
-
-  // push all GlobalEscape nodes on the worklist
-  for( uint next = 0; next < cg_length; ++next ) {
-    int nk = cg_worklist.at(next);
-    if (ptnode_adr(nk)->escape_state() == PointsToNode::GlobalEscape)
-      worklist.push(nk);
-  }
-  // mark all nodes reachable from GlobalEscape nodes
-  while(worklist.length() > 0) {
-    PointsToNode* ptn = ptnode_adr(worklist.pop());
-    uint e_cnt = ptn->edge_count();
-    for (uint ei = 0; ei < e_cnt; ei++) {
-      uint npi = ptn->edge_target(ei);
-      PointsToNode *np = ptnode_adr(npi);
-      if (np->escape_state() < PointsToNode::GlobalEscape) {
-        np->set_escape_state(PointsToNode::GlobalEscape);
-        worklist.push(npi);
-      }
-    }
+  // 7. Adjust escape state of nonescaping objects.
+  for (uint next = 0; next < addp_length; ++next) {
+    Node* n = addp_worklist.at(next);
+    adjust_escape_state(n);
   }

-  // push all ArgEscape nodes on the worklist
-  for( uint next = 0; next < cg_length; ++next ) {
-    int nk = cg_worklist.at(next);
-    if (ptnode_adr(nk)->escape_state() == PointsToNode::ArgEscape)
-      worklist.push(nk);
-  }
+  // 8. Propagate escape states.
+  worklist.clear();
+
+  // mark all nodes reachable from GlobalEscape nodes
+  (void)propagate_escape_state(&cg_worklist, &worklist, PointsToNode::GlobalEscape);
+
   // mark all nodes reachable from ArgEscape nodes
-  while(worklist.length() > 0) {
-    PointsToNode* ptn = ptnode_adr(worklist.pop());
-    if (ptn->node_type() == PointsToNode::JavaObject)
-      has_non_escaping_obj = true; // Non GlobalEscape
-    uint e_cnt = ptn->edge_count();
-    for (uint ei = 0; ei < e_cnt; ei++) {
-      uint npi = ptn->edge_target(ei);
-      PointsToNode *np = ptnode_adr(npi);
-      if (np->escape_state() < PointsToNode::ArgEscape) {
-        np->set_escape_state(PointsToNode::ArgEscape);
-        worklist.push(npi);
-      }
-    }
-  }
-
-  GrowableArray<Node*> alloc_worklist;
+  bool has_non_escaping_obj = propagate_escape_state(&cg_worklist, &worklist, PointsToNode::ArgEscape);

   // push all NoEscape nodes on the worklist
   for( uint next = 0; next < cg_length; ++next ) {
@@ -1722,15 +1700,20 @@
     if (ptnode_adr(nk)->escape_state() == PointsToNode::NoEscape)
       worklist.push(nk);
   }
+  alloc_worklist.clear();
   // mark all nodes reachable from NoEscape nodes
   while(worklist.length() > 0) {
-    PointsToNode* ptn = ptnode_adr(worklist.pop());
-    if (ptn->node_type() == PointsToNode::JavaObject)
-      has_non_escaping_obj = true; // Non GlobalEscape
+    uint nk = worklist.pop();
+    PointsToNode* ptn = ptnode_adr(nk);
+    if (ptn->node_type() == PointsToNode::JavaObject &&
+        !(nk == _noop_null || nk == _oop_null))
+      has_non_escaping_obj = true; // Non Escape
     Node* n = ptn->_node;
-    if (n->is_Allocate() && ptn->_scalar_replaceable ) {
+    bool scalar_replaceable = ptn->scalar_replaceable();
+    if (n->is_Allocate() && scalar_replaceable) {
       // Push scalar replaceable allocations on alloc_worklist
-      // for processing in split_unique_types().
+      // for processing in split_unique_types(). Note,
+      // following code may change scalar_replaceable value.
       alloc_worklist.append(n);
     }
     uint e_cnt = ptn->edge_count();
@@ -1738,7 +1721,14 @@
       uint npi = ptn->edge_target(ei);
       PointsToNode *np = ptnode_adr(npi);
       if (np->escape_state() < PointsToNode::NoEscape) {
-        np->set_escape_state(PointsToNode::NoEscape);
+        set_escape_state(npi, PointsToNode::NoEscape);
+        if (!scalar_replaceable) {
+          np->set_scalar_replaceable(false);
+        }
+        worklist.push(npi);
+      } else if (np->scalar_replaceable() && !scalar_replaceable) {
+        // Propagate scalar_replaceable value.
+        np->set_scalar_replaceable(false);
         worklist.push(npi);
       }
     }
@@ -1747,7 +1737,12 @@
   _collecting = false;
   assert(C->unique() == nodes_size(), "there should be no new ideal nodes during ConnectionGraph build");

-  if (EliminateLocks) {
+  assert(ptnode_adr(_oop_null)->escape_state() == PointsToNode::NoEscape, "sanity");
+  if (UseCompressedOops) {
+    assert(ptnode_adr(_noop_null)->escape_state() == PointsToNode::NoEscape, "sanity");
+  }
+
+  if (EliminateLocks && has_non_escaping_obj) {
     // Mark locks before changing ideal graph.
     int cnt = C->macro_count();
     for( int i=0; i < cnt; i++ ) {
@@ -1772,7 +1767,18 @@
   }
 #endif

-  bool has_scalar_replaceable_candidates = alloc_worklist.length() > 0;
+  bool has_scalar_replaceable_candidates = false;
+  alloc_length = alloc_worklist.length();
+  for (uint next = 0; next < alloc_length; ++next) {
+    Node* n = alloc_worklist.at(next);
+    PointsToNode* ptn = ptnode_adr(n->_idx);
+    assert(ptn->escape_state() == PointsToNode::NoEscape, "sanity");
+    if (ptn->scalar_replaceable()) {
+      has_scalar_replaceable_candidates = true;
+      break;
+    }
+  }
+
   if ( has_scalar_replaceable_candidates &&
        C->AliasLevel() >= 3 && EliminateAllocations ) {

@@ -1801,53 +1807,32 @@
   return has_non_escaping_obj;
 }

-// Adjust escape state after Connection Graph is built.
-void ConnectionGraph::adjust_escape_state(int nidx, PhaseTransform* phase) {
-  PointsToNode* ptn = ptnode_adr(nidx);
-  Node* n = ptn->_node;
-  assert(n->is_AddP(), "Should be called for AddP nodes only");
-  // Search for objects which are not scalar replaceable.
-  // Mark their escape state as ArgEscape to propagate the state
-  // to referenced objects.
-  // Note: currently there are no difference in compiler optimizations
-  // for ArgEscape objects and NoEscape objects which are not
-  // scalar replaceable.
+// Find fields initializing values for allocations.
+void ConnectionGraph::find_init_values(Node* alloc, VectorSet* visited, PhaseTransform* phase) {
+  assert(alloc->is_Allocate(), "Should be called for Allocate nodes only");
+  PointsToNode* pta = ptnode_adr(alloc->_idx);
+  assert(pta->escape_state() == PointsToNode::NoEscape, "Not escaped Allocate nodes only");
+  InitializeNode* ini = alloc->as_Allocate()->initialization();

   Compile* C = _compile;
-
-  int offset = ptn->offset();
-  Node* base = get_addp_base(n);
-  VectorSet* ptset = PointsTo(base);
-  int ptset_size = ptset->Size();
-
+  visited->Reset();
   // Check if a oop field's initializing value is recorded and add
   // a corresponding NULL field's value if it is not recorded.
   // Connection Graph does not record a default initialization by NULL
   // captured by Initialize node.
   //
-  // Note: it will disable scalar replacement in some cases:
-  //
-  //    Point p[] = new Point[1];
-  //    p[0] = new Point(); // Will be not scalar replaced
-  //
-  // but it will save us from incorrect optimizations in next cases:
-  //
-  //    Point p[] = new Point[1];
-  //    if ( x ) p[0] = new Point(); // Will be not scalar replaced
-  //
-  // Do a simple control flow analysis to distinguish above cases.
-  //
-  if (offset != Type::OffsetBot && ptset_size == 1) {
-    uint elem = ptset->getelem(); // Allocation node's index
-    // It does not matter if it is not Allocation node since
-    // only non-escaping allocations are scalar replaced.
-    if (ptnode_adr(elem)->_node->is_Allocate() &&
-        ptnode_adr(elem)->escape_state() == PointsToNode::NoEscape) {
-      AllocateNode* alloc = ptnode_adr(elem)->_node->as_Allocate();
-      InitializeNode* ini = alloc->initialization();
+  uint ae_cnt = pta->edge_count();
+  for (uint ei = 0; ei < ae_cnt; ei++) {
+    uint nidx = pta->edge_target(ei); // Field (AddP)
+    PointsToNode* ptn = ptnode_adr(nidx);
+    assert(ptn->_node->is_AddP(), "Should be AddP nodes only");
+    int offset = ptn->offset();
+    if (offset != Type::OffsetBot &&
+        offset != oopDesc::klass_offset_in_bytes() &&
+        !visited->test_set(offset)) {

       // Check only oop fields.
-      const Type* adr_type = n->as_AddP()->bottom_type();
+      const Type* adr_type = ptn->_node->as_AddP()->bottom_type();
       BasicType basic_field_type = T_INT;
       if (adr_type->isa_instptr()) {
         ciField* field = C->alias_type(adr_type->isa_instptr())->field();
@@ -1857,12 +1842,20 @@
           // Ignore non field load (for example, klass load)
         }
       } else if (adr_type->isa_aryptr()) {
-        const Type* elemtype = adr_type->isa_aryptr()->elem();
-        basic_field_type = elemtype->array_element_basic_type();
+        if (offset != arrayOopDesc::length_offset_in_bytes()) {
+          const Type* elemtype = adr_type->isa_aryptr()->elem();
+          basic_field_type = elemtype->array_element_basic_type();
+        } else {
+          // Ignore array length load
+        }
+#ifdef ASSERT
       } else {
-        // Raw pointers are used for initializing stores so skip it.
+        // Raw pointers are used for initializing stores so skip it
+        // since it should be recorded already
+        Node* base = get_addp_base(ptn->_node);
         assert(adr_type->isa_rawptr() && base->is_Proj() &&
                (base->in(0) == alloc),"unexpected pointer type");
+#endif
       }
       if (basic_field_type == T_OBJECT ||
           basic_field_type == T_NARROWOOP ||
@@ -1877,18 +1870,33 @@
             // Check for a store which follows allocation without branches.
             // For example, a volatile field store is not collected
             // by Initialize node. TODO: it would be nice to use idom() here.
-            for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
-              store = n->fast_out(i);
-              if (store->is_Store() && store->in(0) != NULL) {
-                Node* ctrl = store->in(0);
-                while(!(ctrl == ini || ctrl == alloc || ctrl == NULL ||
-                        ctrl == C->root() || ctrl == C->top() || ctrl->is_Region() ||
-                        ctrl->is_IfTrue() || ctrl->is_IfFalse())) {
-                   ctrl = ctrl->in(0);
-                }
-                if (ctrl == ini || ctrl == alloc) {
-                  value = store->in(MemNode::ValueIn);
-                  break;
+            //
+            // Search all references to the same field which use different
+            // AddP nodes, for example, in the next case:
+            //
+            //    Point p[] = new Point[1];
+            //    if ( x ) { p[0] = new Point(); p[0].x = x; }
+            //    if ( p[0] != null ) { y = p[0].x; } // has CastPP
+            //
+            for (uint next = ei; (next < ae_cnt) && (value == NULL); next++) {
+              uint fpi = pta->edge_target(next); // Field (AddP)
+              PointsToNode *ptf = ptnode_adr(fpi);
+              if (ptf->offset() == offset) {
+                Node* nf = ptf->_node;
+                for (DUIterator_Fast imax, i = nf->fast_outs(imax); i < imax; i++) {
+                  store = nf->fast_out(i);
+                  if (store->is_Store() && store->in(0) != NULL) {
+                    Node* ctrl = store->in(0);
+                    while(!(ctrl == ini || ctrl == alloc || ctrl == NULL ||
+                            ctrl == C->root() || ctrl == C->top() || ctrl->is_Region() ||
+                            ctrl->is_IfTrue() || ctrl->is_IfFalse())) {
+                       ctrl = ctrl->in(0);
+                    }
+                    if (ctrl == ini || ctrl == alloc) {
+                      value = store->in(MemNode::ValueIn);
+                      break;
+                    }
+                  }
                 }
               }
             }
@@ -1897,21 +1905,35 @@
         if (value == NULL || value != ptnode_adr(value->_idx)->_node) {
           // A field's initializing value was not recorded. Add NULL.
           uint null_idx = UseCompressedOops ? _noop_null : _oop_null;
-          add_pointsto_edge(nidx, null_idx);
+          add_edge_from_fields(alloc->_idx, null_idx, offset);
         }
       }
     }
   }
+}
+
+// Adjust escape state after Connection Graph is built.
+void ConnectionGraph::adjust_escape_state(Node* n) {
+  PointsToNode* ptn = ptnode_adr(n->_idx);
+  assert(n->is_AddP(), "Should be called for AddP nodes only");
+  // Search for objects which are not scalar replaceable
+  // and mark them to propagate the state to referenced objects.
+  //
+
+  int offset = ptn->offset();
+  Node* base = get_addp_base(n);
+  VectorSet* ptset = PointsTo(base);
+  int ptset_size = ptset->Size();

   // An object is not scalar replaceable if the field which may point
   // to it has unknown offset (unknown element of an array of objects).
   //
+
   if (offset == Type::OffsetBot) {
     uint e_cnt = ptn->edge_count();
     for (uint ei = 0; ei < e_cnt; ei++) {
       uint npi = ptn->edge_target(ei);
-      set_escape_state(npi, PointsToNode::ArgEscape);
-      ptnode_adr(npi)->_scalar_replaceable = false;
+      ptnode_adr(npi)->set_scalar_replaceable(false);
     }
   }

@@ -1930,20 +1952,62 @@
   // to unknown field (unknown element for arrays, offset is OffsetBot).
   //
   // Or the address may point to more then one object. This may produce
-  // the false positive result (set scalar_replaceable to false)
+  // the false positive result (set not scalar replaceable)
   // since the flow-insensitive escape analysis can't separate
   // the case when stores overwrite the field's value from the case
   // when stores happened on different control branches.
   //
+  // Note: it will disable scalar replacement in some cases:
+  //
+  //    Point p[] = new Point[1];
+  //    p[0] = new Point(); // Will be not scalar replaced
+  //
+  // but it will save us from incorrect optimizations in next cases:
+  //
+  //    Point p[] = new Point[1];
+  //    if ( x ) p[0] = new Point(); // Will be not scalar replaced
+  //
   if (ptset_size > 1 || ptset_size != 0 &&
       (has_LoadStore || offset == Type::OffsetBot)) {
     for( VectorSetI j(ptset); j.test(); ++j ) {
-      set_escape_state(j.elem, PointsToNode::ArgEscape);
-      ptnode_adr(j.elem)->_scalar_replaceable = false;
+      ptnode_adr(j.elem)->set_scalar_replaceable(false);
     }
   }
 }

+// Propagate escape states to referenced nodes.
+bool ConnectionGraph::propagate_escape_state(GrowableArray<int>* cg_worklist,
+                                             GrowableArray<uint>* worklist,
+                                             PointsToNode::EscapeState esc_state) {
+  bool has_java_obj = false;
+
+  // push all nodes with the same escape state on the worklist
+  uint cg_length = cg_worklist->length();
+  for (uint next = 0; next < cg_length; ++next) {
+    int nk = cg_worklist->at(next);
+    if (ptnode_adr(nk)->escape_state() == esc_state)
+      worklist->push(nk);
+  }
+  // mark all reachable nodes
+  while (worklist->length() > 0) {
+    PointsToNode* ptn = ptnode_adr(worklist->pop());
+    if (ptn->node_type() == PointsToNode::JavaObject) {
+      has_java_obj = true;
+    }
+    uint e_cnt = ptn->edge_count();
+    for (uint ei = 0; ei < e_cnt; ei++) {
+      uint npi = ptn->edge_target(ei);
+      PointsToNode *np = ptnode_adr(npi);
+      if (np->escape_state() < esc_state) {
+        set_escape_state(npi, esc_state);
+        worklist->push(npi);
+      }
+    }
+  }
+  // Has not escaping java objects
+  return has_java_obj && (esc_state < PointsToNode::GlobalEscape);
+}
+
 void ConnectionGraph::process_call_arguments(CallNode *call, PhaseTransform *phase) {

     switch (call->Opcode()) {
@@ -2100,6 +2164,7 @@
       } else {
         es = PointsToNode::NoEscape;
         edge_to = call_idx;
+        assert(ptnode_adr(call_idx)->scalar_replaceable(), "sanity");
       }
       set_escape_state(call_idx, es);
       add_pointsto_edge(resproj_idx, edge_to);
@@ -2123,10 +2188,11 @@
       } else {
         es = PointsToNode::NoEscape;
         edge_to = call_idx;
+        assert(ptnode_adr(call_idx)->scalar_replaceable(), "sanity");
         int length = call->in(AllocateNode::ALength)->find_int_con(-1);
         if (length < 0 || length > EliminateAllocationArraySizeLimit) {
           // Not scalar replaceable if the length is not constant or too big.
-          ptnode_adr(call_idx)->_scalar_replaceable = false;
+          ptnode_adr(call_idx)->set_scalar_replaceable(false);
         }
       }
       set_escape_state(call_idx, es);
@@ -2168,11 +2234,12 @@
           // Mark it as NoEscape so that objects referenced by
           // it's fields will be marked as NoEscape at least.
           set_escape_state(call_idx, PointsToNode::NoEscape);
+          ptnode_adr(call_idx)->set_scalar_replaceable(false);
           add_pointsto_edge(resproj_idx, call_idx);
           copy_dependencies = true;
         } else if (call_analyzer->is_return_local()) {
           // determine whether any arguments are returned
-          set_escape_state(call_idx, PointsToNode::NoEscape);
+          set_escape_state(call_idx, PointsToNode::ArgEscape);
           bool ret_arg = false;
           for (uint i = TypeFunc::Parms; i < d->cnt(); i++) {
             const Type* at = d->field_at(i);
@@ -2189,7 +2256,6 @@
                   add_pointsto_edge(resproj_idx, arg->_idx);
                 else
                   add_deferred_edge(resproj_idx, arg->_idx);
-                arg_esp->_hidden_alias = true;
               }
             }
           }
@@ -2198,18 +2264,12 @@
             set_escape_state(call_idx, PointsToNode::GlobalEscape);
             add_pointsto_edge(resproj_idx, _phantom_object);
           }
-          copy_dependencies = true;
+          if (done) {
+            copy_dependencies = true;
+          }
         } else {
           set_escape_state(call_idx, PointsToNode::GlobalEscape);
           add_pointsto_edge(resproj_idx, _phantom_object);
-          for (uint i = TypeFunc::Parms; i < d->cnt(); i++) {
-            const Type* at = d->field_at(i);
-            if (at->isa_oopptr() != NULL) {
-              Node *arg = call->in(i)->uncast();
-              PointsToNode *arg_esp = ptnode_adr(arg->_idx);
-              arg_esp->_hidden_alias = true;
-            }
-          }
         }
         if (copy_dependencies)
           call_analyzer->copy_dependencies(_compile->dependencies());
--- a/src/share/vm/opto/escape.hpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/opto/escape.hpp	Fri Nov 18 15:15:13 2011 -0800
@@ -74,7 +74,7 @@
 // C2 does not have local variables.  However for the purposes of constructing
 // the connection graph, the following IR nodes are treated as local variables:
 //     Phi    (pointer values)
-//     LoadP
+//     LoadP, LoadN
 //     Proj#5 (value returned from callnodes including allocations)
 //     CheckCastPP, CastPP
 //
@@ -84,7 +84,7 @@
 //
 // The following node types are JavaObject:
 //
-//     top()
+//     phantom_object (general globally escaped object)
 //     Allocate
 //     AllocateArray
 //     Parm  (for incoming arguments)
@@ -93,6 +93,7 @@
 //     ConP
 //     LoadKlass
 //     ThreadLocal
+//     CallStaticJava (which returns Object)
 //
 // AddP nodes are fields.
 //
@@ -130,10 +131,12 @@

   typedef enum {
     UnknownEscape = 0,
-    NoEscape      = 1, // A scalar replaceable object with unique type.
-    ArgEscape     = 2, // An object passed as argument or referenced by
-                       // argument (and not globally escape during call).
-    GlobalEscape  = 3  // An object escapes the method and thread.
+    NoEscape      = 1, // An object does not escape method or thread and it is
+                       // not passed to call. It could be replaced with scalar.
+    ArgEscape     = 2, // An object does not escape method or thread but it is
+                       // passed as argument to call or referenced by argument
+                       // and it does not escape during call.
+    GlobalEscape  = 3  // An object escapes the method or thread.
   } EscapeState;

   typedef enum {
@@ -153,28 +156,25 @@

   NodeType             _type;
   EscapeState          _escape;
-  GrowableArray<uint>* _edges;   // outgoing edges
+  GrowableArray<uint>* _edges; // outgoing edges
+  Node* _node;                 // Ideal node corresponding to this PointsTo node.
+  int   _offset;               // Object fields offsets.
+  bool  _scalar_replaceable;   // Not escaped object could be replaced with scalar

 public:
-  Node* _node;              // Ideal node corresponding to this PointsTo node.
-  int   _offset;            // Object fields offsets.
-  bool  _scalar_replaceable;// Not escaped object could be replaced with scalar
-  bool  _hidden_alias;      // This node is an argument to a function.
-                            // which may return it creating a hidden alias.
-
   PointsToNode():
     _type(UnknownType),
     _escape(UnknownEscape),
     _edges(NULL),
     _node(NULL),
     _offset(-1),
-    _scalar_replaceable(true),
-    _hidden_alias(false) {}
+    _scalar_replaceable(true) {}


   EscapeState escape_state() const { return _escape; }
   NodeType node_type() const { return _type;}
   int offset() { return _offset;}
+  bool scalar_replaceable() { return _scalar_replaceable;}

   void set_offset(int offs) { _offset = offs;}
   void set_escape_state(EscapeState state) { _escape = state; }
@@ -182,6 +182,7 @@
     assert(_type == UnknownType || _type == ntype, "Can't change node type");
     _type = ntype;
   }
+  void set_scalar_replaceable(bool v) { _scalar_replaceable = v; }

   // count of outgoing edges
   uint edge_count() const { return (_edges == NULL) ? 0 : _edges->length(); }
@@ -233,8 +234,8 @@
                                        // that pointer values loaded from
                                        // a field which has not been set
                                        // are assumed to point to.
-  uint                      _oop_null; // ConP(#NULL)
-  uint                     _noop_null; // ConN(#NULL)
+  uint                      _oop_null; // ConP(#NULL)->_idx
+  uint                     _noop_null; // ConN(#NULL)->_idx

   Compile *                  _compile; // Compile object for current compilation
   PhaseIterGVN *                _igvn; // Value numbering
@@ -339,8 +340,16 @@
   // Set the escape state of a node
   void set_escape_state(uint ni, PointsToNode::EscapeState es);

+  // Find fields initializing values for allocations.
+  void find_init_values(Node* n, VectorSet* visited, PhaseTransform* phase);
+
   // Adjust escape state after Connection Graph is built.
-  void adjust_escape_state(int nidx, PhaseTransform* phase);
+  void adjust_escape_state(Node* n);
+
+  // Propagate escape states to referenced nodes.
+  bool propagate_escape_state(GrowableArray<int>* cg_worklist,
+                              GrowableArray<uint>* worklist,
+                              PointsToNode::EscapeState esc_state);

   // Compute the escape information
   bool compute_escape();
@@ -357,21 +366,6 @@
   // escape state of a node
   PointsToNode::EscapeState escape_state(Node *n);

-  // other information we have collected
-  bool is_scalar_replaceable(Node *n) {
-    if (_collecting || (n->_idx >= nodes_size()))
-      return false;
-    PointsToNode* ptn = ptnode_adr(n->_idx);
-    return ptn->escape_state() == PointsToNode::NoEscape && ptn->_scalar_replaceable;
-  }
-
-  bool hidden_alias(Node *n) {
-    if (_collecting || (n->_idx >= nodes_size()))
-      return true;
-    PointsToNode* ptn = ptnode_adr(n->_idx);
-    return (ptn->escape_state() != PointsToNode::NoEscape) || ptn->_hidden_alias;
-  }
-
 #ifndef PRODUCT
   void dump();
 #endif
--- a/src/share/vm/opto/loopnode.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/opto/loopnode.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -1883,7 +1883,7 @@
 //----------------------------build_and_optimize-------------------------------
 // Create a PhaseLoop.  Build the ideal Loop tree.  Map each Ideal Node to
 // its corresponding LoopNode.  If 'optimize' is true, do some loop cleanups.
-void PhaseIdealLoop::build_and_optimize(bool do_split_ifs) {
+void PhaseIdealLoop::build_and_optimize(bool do_split_ifs, bool skip_loop_opts) {
   ResourceMark rm;

   int old_progress = C->major_progress();
@@ -2072,6 +2072,16 @@
   }
 #endif

+  if (skip_loop_opts) {
+    // Cleanup any modified bits
+    _igvn.optimize();
+
+    if (C->log() != NULL) {
+      log_loop_tree(_ltree_root, _ltree_root, C->log());
+    }
+    return;
+  }
+
   if (ReassociateInvariants) {
     // Reassociate invariants and prep for split_thru_phi
     for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) {
--- a/src/share/vm/opto/loopnode.hpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/opto/loopnode.hpp	Fri Nov 18 15:15:13 2011 -0800
@@ -747,11 +747,11 @@
     _dom_lca_tags(arena()), // Thread::resource_area
     _verify_me(NULL),
     _verify_only(true) {
-    build_and_optimize(false);
+    build_and_optimize(false, false);
   }

   // build the loop tree and perform any requested optimizations
-  void build_and_optimize(bool do_split_if);
+  void build_and_optimize(bool do_split_if, bool skip_loop_opts);

 public:
   // Dominators for the sea of nodes
@@ -762,13 +762,13 @@
   Node *dom_lca_internal( Node *n1, Node *n2 ) const;

   // Compute the Ideal Node to Loop mapping
-  PhaseIdealLoop( PhaseIterGVN &igvn, bool do_split_ifs) :
+  PhaseIdealLoop( PhaseIterGVN &igvn, bool do_split_ifs, bool skip_loop_opts = false) :
     PhaseTransform(Ideal_Loop),
     _igvn(igvn),
     _dom_lca_tags(arena()), // Thread::resource_area
     _verify_me(NULL),
     _verify_only(false) {
-    build_and_optimize(do_split_ifs);
+    build_and_optimize(do_split_ifs, skip_loop_opts);
   }

   // Verify that verify_me made the same decisions as a fresh run.
@@ -778,7 +778,7 @@
     _dom_lca_tags(arena()), // Thread::resource_area
     _verify_me(verify_me),
     _verify_only(false) {
-    build_and_optimize(false);
+    build_and_optimize(false, false);
   }

   // Build and verify the loop tree without modifying the graph.  This
--- a/src/share/vm/opto/loopopts.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/opto/loopopts.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -28,6 +28,7 @@
 #include "opto/connode.hpp"
 #include "opto/divnode.hpp"
 #include "opto/loopnode.hpp"
+#include "opto/matcher.hpp"
 #include "opto/mulnode.hpp"
 #include "opto/rootnode.hpp"
 #include "opto/subnode.hpp"
@@ -472,46 +473,50 @@
 // 1 or 2 items with a total of 1 or 2 ops executed speculatively.
 Node *PhaseIdealLoop::conditional_move( Node *region ) {

-  assert( region->is_Region(), "sanity check" );
-  if( region->req() != 3 ) return NULL;
+  assert(region->is_Region(), "sanity check");
+  if (region->req() != 3) return NULL;

   // Check for CFG diamond
   Node *lp = region->in(1);
   Node *rp = region->in(2);
-  if( !lp || !rp ) return NULL;
+  if (!lp || !rp) return NULL;
   Node *lp_c = lp->in(0);
-  if( lp_c == NULL || lp_c != rp->in(0) || !lp_c->is_If() ) return NULL;
+  if (lp_c == NULL || lp_c != rp->in(0) || !lp_c->is_If()) return NULL;
   IfNode *iff = lp_c->as_If();

-  // Check for highly predictable branch.  No point in CMOV'ing if
-  // we are going to predict accurately all the time.
-  // %%% This hides patterns produced by utility methods like Math.min.
-  if( iff->_prob < PROB_UNLIKELY_MAG(3) ||
-      iff->_prob > PROB_LIKELY_MAG(3) )
-    return NULL;
-
   // Check for ops pinned in an arm of the diamond.
   // Can't remove the control flow in this case
-  if( lp->outcnt() > 1 ) return NULL;
-  if( rp->outcnt() > 1 ) return NULL;
+  if (lp->outcnt() > 1) return NULL;
+  if (rp->outcnt() > 1) return NULL;
+
+  IdealLoopTree* r_loop = get_loop(region);
+  assert(r_loop == get_loop(iff), "sanity");
+  // Always convert to CMOVE if all results are used only outside this loop.
+  bool used_inside_loop = (r_loop == _ltree_root);

   // Check profitability
   int cost = 0;
   int phis = 0;
   for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
     Node *out = region->fast_out(i);
-    if( !out->is_Phi() ) continue; // Ignore other control edges, etc
+    if (!out->is_Phi()) continue; // Ignore other control edges, etc
     phis++;
     PhiNode* phi = out->as_Phi();
-    switch (phi->type()->basic_type()) {
-    case T_LONG:
-      cost++;                   // Probably encodes as 2 CMOV's
+    BasicType bt = phi->type()->basic_type();
+    switch (bt) {
+    case T_FLOAT:
+    case T_DOUBLE: {
+      cost += Matcher::float_cmove_cost(); // Could be very expensive
+      break;
+    }
+    case T_LONG: {
+      cost += Matcher::long_cmove_cost(); // May encodes as 2 CMOV's
+    }
     case T_INT:                 // These all CMOV fine
-    case T_FLOAT:
-    case T_DOUBLE:
-    case T_ADDRESS:             // (RawPtr)
+    case T_ADDRESS: {           // (RawPtr)
       cost++;
       break;
+    }
     case T_NARROWOOP: // Fall through
     case T_OBJECT: {            // Base oops are OK, but not derived oops
       const TypeOopPtr *tp = phi->type()->make_ptr()->isa_oopptr();
@@ -524,7 +529,7 @@
       // relevant bases.  This puts the allocator in the business of
       // manufacturing expensive instructions, generally a bad plan.
       // Just Say No to Conditionally-Moved Derived Pointers.
-      if( tp && tp->offset() != 0 )
+      if (tp && tp->offset() != 0)
         return NULL;
       cost++;
       break;
@@ -533,39 +538,64 @@
       return NULL;              // In particular, can't do memory or I/O
     }
     // Add in cost any speculative ops
-    for( uint j = 1; j < region->req(); j++ ) {
+    for (uint j = 1; j < region->req(); j++) {
       Node *proj = region->in(j);
       Node *inp = phi->in(j);
       if (get_ctrl(inp) == proj) { // Found local op
         cost++;
         // Check for a chain of dependent ops; these will all become
         // speculative in a CMOV.
-        for( uint k = 1; k < inp->req(); k++ )
+        for (uint k = 1; k < inp->req(); k++)
           if (get_ctrl(inp->in(k)) == proj)
-            return NULL;        // Too much speculative goo
+            cost += ConditionalMoveLimit; // Too much speculative goo
       }
     }
     // See if the Phi is used by a Cmp or Narrow oop Decode/Encode.
     // This will likely Split-If, a higher-payoff operation.
     for (DUIterator_Fast kmax, k = phi->fast_outs(kmax); k < kmax; k++) {
       Node* use = phi->fast_out(k);
-      if( use->is_Cmp() || use->is_DecodeN() || use->is_EncodeP() )
-        return NULL;
+      if (use->is_Cmp() || use->is_DecodeN() || use->is_EncodeP())
+        cost += ConditionalMoveLimit;
+      // Is there a use inside the loop?
+      // Note: check only basic types since CMoveP is pinned.
+      if (!used_inside_loop && is_java_primitive(bt)) {
+        IdealLoopTree* u_loop = get_loop(has_ctrl(use) ? get_ctrl(use) : use);
+        if (r_loop == u_loop || r_loop->is_member(u_loop)) {
+          used_inside_loop = true;
+        }
+      }
     }
   }
-  if( cost >= ConditionalMoveLimit ) return NULL; // Too much goo
   Node* bol = iff->in(1);
-  assert( bol->Opcode() == Op_Bool, "" );
+  assert(bol->Opcode() == Op_Bool, "");
   int cmp_op = bol->in(1)->Opcode();
   // It is expensive to generate flags from a float compare.
   // Avoid duplicated float compare.
-  if( phis > 1 && (cmp_op == Op_CmpF || cmp_op == Op_CmpD)) return NULL;
+  if (phis > 1 && (cmp_op == Op_CmpF || cmp_op == Op_CmpD)) return NULL;
+
+  float infrequent_prob = PROB_UNLIKELY_MAG(3);
+  // Ignore cost and blocks frequency if CMOVE can be moved outside the loop.
+  if (used_inside_loop) {
+    if (cost >= ConditionalMoveLimit) return NULL; // Too much goo
+
+    // BlockLayoutByFrequency optimization moves infrequent branch
+    // from hot path. No point in CMOV'ing in such case (110 is used
+    // instead of 100 to take into account not exactness of float value).
+    if (BlockLayoutByFrequency) {
+      infrequent_prob = MAX2(infrequent_prob, (float)BlockLayoutMinDiamondPercentage/110.0f);
+    }
+  }
+  // Check for highly predictable branch.  No point in CMOV'ing if
+  // we are going to predict accurately all the time.
+  if (iff->_prob < infrequent_prob ||
+      iff->_prob > (1.0f - infrequent_prob))
+    return NULL;

   // --------------
   // Now replace all Phis with CMOV's
   Node *cmov_ctrl = iff->in(0);
   uint flip = (lp->Opcode() == Op_IfTrue);
-  while( 1 ) {
+  while (1) {
     PhiNode* phi = NULL;
     for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
       Node *out = region->fast_out(i);
@@ -576,15 +606,15 @@
     }
     if (phi == NULL)  break;
 #ifndef PRODUCT
-    if( PrintOpto && VerifyLoopOptimizations ) tty->print_cr("CMOV");
+    if (PrintOpto && VerifyLoopOptimizations) tty->print_cr("CMOV");
 #endif
     // Move speculative ops
-    for( uint j = 1; j < region->req(); j++ ) {
+    for (uint j = 1; j < region->req(); j++) {
       Node *proj = region->in(j);
       Node *inp = phi->in(j);
       if (get_ctrl(inp) == proj) { // Found local op
 #ifndef PRODUCT
-        if( PrintOpto && VerifyLoopOptimizations ) {
+        if (PrintOpto && VerifyLoopOptimizations) {
           tty->print("  speculate: ");
           inp->dump();
         }
@@ -596,7 +626,15 @@
     register_new_node( cmov, cmov_ctrl );
     _igvn.replace_node( phi, cmov );
 #ifndef PRODUCT
-    if( VerifyLoopOptimizations ) verify();
+    if (TraceLoopOpts) {
+      tty->print("CMOV  ");
+      r_loop->dump_head();
+      if (Verbose) {
+        bol->in(1)->dump(1);
+        cmov->dump(1);
+      }
+    }
+    if (VerifyLoopOptimizations) verify();
 #endif
   }

@@ -676,14 +714,14 @@

   // Split 'n' through the merge point if it is profitable
   Node *phi = split_thru_phi( n, n_blk, policy );
-  if( !phi ) return n;
+  if (!phi) return n;

   // Found a Phi to split thru!
   // Replace 'n' with the new phi
   _igvn.replace_node( n, phi );
   // Moved a load around the loop, 'en-registering' something.
-  if( n_blk->Opcode() == Op_Loop && n->is_Load() &&
-      !phi->in(LoopNode::LoopBackControl)->is_Load() )
+  if (n_blk->is_Loop() && n->is_Load() &&
+      !phi->in(LoopNode::LoopBackControl)->is_Load())
     C->set_major_progress();

   return phi;
--- a/src/share/vm/opto/machnode.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/opto/machnode.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -484,6 +484,13 @@
   // Bind the offset lazily.
   if (offset == -1) {
     Compile::ConstantTable& constant_table = Compile::current()->constant_table();
+    // If called from Compile::scratch_emit_size assume the worst-case
+    // for load offsets: half the constant table size.
+    // NOTE: Don't return or calculate the actual offset (which might
+    // be zero) because that leads to problems with e.g. jumpXtnd on
+    // some architectures (cf. add-optimization in SPARC jumpXtnd).
+    if (Compile::current()->in_scratch_emit_size())
+      return constant_table.size() / 2;
     offset = constant_table.table_base_offset() + constant_table.find_offset(_constant);
     _constant.set_offset(offset);
   }
--- a/src/share/vm/opto/matcher.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/opto/matcher.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -1915,7 +1915,7 @@
         set_dontcare(n);
         break;
       case Op_Jump:
-        mstack.push(n->in(1), Visit);         // Switch Value
+        mstack.push(n->in(1), Pre_Visit);     // Switch Value (could be shared)
         mstack.push(n->in(0), Pre_Visit);     // Visit Control input
         continue;                             // while (mstack.is_nonempty())
       case Op_StrComp:
--- a/src/share/vm/opto/matcher.hpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/opto/matcher.hpp	Fri Nov 18 15:15:13 2011 -0800
@@ -360,6 +360,12 @@
   // Anything this size or smaller may get converted to discrete scalar stores.
   static const int init_array_short_size;

+  // Some hardware needs 2 CMOV's for longs.
+  static const int long_cmove_cost();
+
+  // Some hardware have expensive CMOV for float and double.
+  static const int float_cmove_cost();
+
   // Should the Matcher clone shifts on addressing modes, expecting them to
   // be subsumed into complex addressing expressions or compute them into
   // registers?  True for Intel but false for most RISCs
--- a/src/share/vm/opto/memnode.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/opto/memnode.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -1421,6 +1421,12 @@
     const TypeOopPtr *t_oop = addr_t->isa_oopptr();
     if (can_reshape && opt_mem->is_Phi() &&
         (t_oop != NULL) && t_oop->is_known_instance_field()) {
+      PhaseIterGVN *igvn = phase->is_IterGVN();
+      if (igvn != NULL && igvn->_worklist.member(opt_mem)) {
+        // Delay this transformation until memory Phi is processed.
+        phase->is_IterGVN()->_worklist.push(this);
+        return NULL;
+      }
       // Split instance field load through Phi.
       Node* result = split_through_phi(phase);
       if (result != NULL) return result;
--- a/src/share/vm/opto/phaseX.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/opto/phaseX.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -322,11 +322,12 @@
 void NodeHash::dump() {
   _total_inserts       += _inserts;
   _total_insert_probes += _insert_probes;
-  if( PrintCompilation && PrintOptoStatistics && Verbose && (_inserts > 0) ) { // PrintOptoGVN
-    if( PrintCompilation2 ) {
-      for( uint i=0; i<_max; i++ )
-      if( _table[i] )
-        tty->print("%d/%d/%d ",i,_table[i]->hash()&(_max-1),_table[i]->_idx);
+  if (PrintCompilation && PrintOptoStatistics && Verbose && (_inserts > 0)) {
+    if (WizardMode) {
+      for (uint i=0; i<_max; i++) {
+        if (_table[i])
+          tty->print("%d/%d/%d ",i,_table[i]->hash()&(_max-1),_table[i]->_idx);
+      }
     }
     tty->print("\nGVN Hash stats:  %d grows to %d max_size\n", _grows, _max);
     tty->print("  %d/%d (%8.1f%% full)\n", _inserts, _max, (double)_inserts/_max*100.0);
--- a/src/share/vm/opto/runtime.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/opto/runtime.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -338,6 +338,24 @@
     // inform GC that we won't do card marks for initializing writes.
     new_store_pre_barrier(thread);
   }
+
+  oop result = thread->vm_result();
+  if ((len > 0) && (result != NULL) &&
+      is_deoptimized_caller_frame(thread)) {
+    // Zero array here if the caller is deoptimized.
+    int size = ((typeArrayOop)result)->object_size();
+    BasicType elem_type = typeArrayKlass::cast(array_type)->element_type();
+    const size_t hs = arrayOopDesc::header_size(elem_type);
+    // Align to next 8 bytes to avoid trashing arrays's length.
+    const size_t aligned_hs = align_object_offset(hs);
+    HeapWord* obj = (HeapWord*)result;
+    if (aligned_hs > hs) {
+      Copy::zero_to_words(obj+hs, aligned_hs-hs);
+    }
+    // Optimized zeroing.
+    Copy::fill_to_aligned_words(obj+aligned_hs, size-aligned_hs);
+  }
+
 JRT_END

 // Note: multianewarray for one dimension is handled inline by GraphKit::new_array.
@@ -1130,12 +1148,22 @@
     assert(stub_frame.is_runtime_frame() || exception_blob()->contains(stub_frame.pc()), "sanity check");
     frame caller_frame = stub_frame.sender(&reg_map);

-    // bypass VM_DeoptimizeFrame and deoptimize the frame directly
+    // Deoptimize the caller frame.
     Deoptimization::deoptimize_frame(thread, caller_frame.id());
   }
 }


+bool OptoRuntime::is_deoptimized_caller_frame(JavaThread *thread) {
+  // Called from within the owner thread, so no need for safepoint
+  RegisterMap reg_map(thread);
+  frame stub_frame = thread->last_frame();
+  assert(stub_frame.is_runtime_frame() || exception_blob()->contains(stub_frame.pc()), "sanity check");
+  frame caller_frame = stub_frame.sender(&reg_map);
+  return caller_frame.is_deoptimized_frame();
+}
+
+
 const TypeFunc *OptoRuntime::register_finalizer_Type() {
   // create input type (domain)
   const Type **fields = TypeTuple::fields(1);
--- a/src/share/vm/opto/runtime.hpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/opto/runtime.hpp	Fri Nov 18 15:15:13 2011 -0800
@@ -175,6 +175,7 @@
   static address handle_exception_C_helper(JavaThread* thread, nmethod*& nm);
   static address rethrow_C                (oopDesc* exception, JavaThread *thread, address return_pc );
   static void deoptimize_caller_frame     (JavaThread *thread, bool doit);
+  static bool is_deoptimized_caller_frame (JavaThread *thread);

   // CodeBlob support
   // ===================================================================
--- a/src/share/vm/prims/jni.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/prims/jni.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -5042,7 +5042,8 @@
 void execute_internal_vm_tests() {
   if (ExecuteInternalVMTests) {
     assert(QuickSort::test_quick_sort(), "test_quick_sort failed");
-    tty->print_cr("All tests passed");
+    assert(arrayOopDesc::test_max_array_length(), "test_max_array_length failed");
+    tty->print_cr("All internal VM tests passed");
   }
 }
--- a/src/share/vm/prims/jniCheck.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/prims/jniCheck.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -107,7 +107,7 @@
     if (env != xenv) {                                                   \
       NativeReportJNIFatalError(thr, warn_wrong_jnienv);                 \
     }                                                                    \
-    __ENTRY(result_type, header, thr)
+    VM_ENTRY_BASE(result_type, header, thr)


 #define UNCHECKED() (unchecked_jni_NativeInterface)
--- a/src/share/vm/prims/jvmtiEnter.xsl	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/prims/jvmtiEnter.xsl	Fri Nov 18 15:15:13 2011 -0800
@@ -426,7 +426,7 @@
     <xsl:value-of select="$space"/>
     <xsl:text>ThreadInVMfromNative __tiv(current_thread);</xsl:text>
     <xsl:value-of select="$space"/>
-    <xsl:text>__ENTRY(jvmtiError, </xsl:text>
+    <xsl:text>VM_ENTRY_BASE(jvmtiError, </xsl:text>
     <xsl:apply-templates select="." mode="functionid"/>
     <xsl:text> , current_thread)</xsl:text>
     <xsl:value-of select="$space"/>
--- a/src/share/vm/prims/jvmtiEnv.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/prims/jvmtiEnv.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -173,7 +173,7 @@
     // from native so as to resolve the jthread.

     ThreadInVMfromNative __tiv(current_thread);
-    __ENTRY(jvmtiError, JvmtiEnv::GetThreadLocalStorage , current_thread)
+    VM_ENTRY_BASE(jvmtiError, JvmtiEnv::GetThreadLocalStorage , current_thread)
     debug_only(VMNativeEntryWrapper __vew;)

     oop thread_oop = JNIHandles::resolve_external_guard(thread);
--- a/src/share/vm/prims/jvmtiExport.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/prims/jvmtiExport.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -373,7 +373,7 @@
     JavaThread* current_thread = (JavaThread*) ThreadLocalStorage::thread();
     // transition code: native to VM
     ThreadInVMfromNative __tiv(current_thread);
-    __ENTRY(jvmtiEnv*, JvmtiExport::get_jvmti_interface, current_thread)
+    VM_ENTRY_BASE(jvmtiEnv*, JvmtiExport::get_jvmti_interface, current_thread)
     debug_only(VMNativeEntryWrapper __vew;)

     JvmtiEnv *jvmti_env = JvmtiEnv::create_a_jvmti(version);
--- a/src/share/vm/prims/methodHandles.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/prims/methodHandles.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -206,9 +206,12 @@
   _adapter_code = MethodHandlesAdapterBlob::create(adapter_code_size);
   if (_adapter_code == NULL)
     vm_exit_out_of_memory(adapter_code_size, "CodeCache: no room for MethodHandles adapters");
-  CodeBuffer code(_adapter_code);
-  MethodHandlesAdapterGenerator g(&code);
-  g.generate();
+  {
+    CodeBuffer code(_adapter_code);
+    MethodHandlesAdapterGenerator g(&code);
+    g.generate();
+    code.log_section_sizes("MethodHandlesAdapterBlob");
+  }
 }

 //------------------------------------------------------------------------------
@@ -3079,26 +3082,26 @@
 JVM_END

 JVM_ENTRY(void, MHN_setCallSiteTargetNormal(JNIEnv* env, jobject igcls, jobject call_site_jh, jobject target_jh)) {
-  oop call_site = JNIHandles::resolve_non_null(call_site_jh);
-  oop target    = JNIHandles::resolve(target_jh);
+  Handle call_site(THREAD, JNIHandles::resolve_non_null(call_site_jh));
+  Handle target   (THREAD, JNIHandles::resolve(target_jh));
   {
     // Walk all nmethods depending on this call site.
     MutexLocker mu(Compile_lock, thread);
     Universe::flush_dependents_on(call_site, target);
   }
-  java_lang_invoke_CallSite::set_target(call_site, target);
+  java_lang_invoke_CallSite::set_target(call_site(), target());
 }
 JVM_END

 JVM_ENTRY(void, MHN_setCallSiteTargetVolatile(JNIEnv* env, jobject igcls, jobject call_site_jh, jobject target_jh)) {
-  oop call_site = JNIHandles::resolve_non_null(call_site_jh);
-  oop target    = JNIHandles::resolve(target_jh);
+  Handle call_site(THREAD, JNIHandles::resolve_non_null(call_site_jh));
+  Handle target   (THREAD, JNIHandles::resolve(target_jh));
   {
     // Walk all nmethods depending on this call site.
     MutexLocker mu(Compile_lock, thread);
     Universe::flush_dependents_on(call_site, target);
   }
-  java_lang_invoke_CallSite::set_target_volatile(call_site, target);
+  java_lang_invoke_CallSite::set_target_volatile(call_site(), target());
 }
 JVM_END
--- a/src/share/vm/prims/unsafe.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/prims/unsafe.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -302,21 +302,24 @@

 UNSAFE_ENTRY(void, Unsafe_SetObjectVolatile(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jobject x_h))
   UnsafeWrapper("Unsafe_SetObjectVolatile");
+  {
+    // Catch VolatileCallSite.target stores (via
+    // CallSite.setTargetVolatile) and check call site dependencies.
+    oop p = JNIHandles::resolve(obj);
+    if ((offset == java_lang_invoke_CallSite::target_offset_in_bytes()) && p->is_a(SystemDictionary::CallSite_klass())) {
+      Handle call_site    (THREAD, p);
+      Handle method_handle(THREAD, JNIHandles::resolve(x_h));
+      assert(call_site    ->is_a(SystemDictionary::CallSite_klass()),     "must be");
+      assert(method_handle->is_a(SystemDictionary::MethodHandle_klass()), "must be");
+      {
+        // Walk all nmethods depending on this call site.
+        MutexLocker mu(Compile_lock, thread);
+        Universe::flush_dependents_on(call_site(), method_handle());
+      }
+    }
+  }
   oop x = JNIHandles::resolve(x_h);
   oop p = JNIHandles::resolve(obj);
-  // Catch VolatileCallSite.target stores (via
-  // CallSite.setTargetVolatile) and check call site dependencies.
-  if ((offset == java_lang_invoke_CallSite::target_offset_in_bytes()) && p->is_a(SystemDictionary::CallSite_klass())) {
-    oop call_site     = p;
-    oop method_handle = x;
-    assert(call_site    ->is_a(SystemDictionary::CallSite_klass()),     "must be");
-    assert(method_handle->is_a(SystemDictionary::MethodHandle_klass()), "must be");
-    {
-      // Walk all nmethods depending on this call site.
-      MutexLocker mu(Compile_lock, thread);
-      Universe::flush_dependents_on(call_site, method_handle);
-    }
-  }
   void* addr = index_oop_from_field_offset_long(p, offset);
   OrderAccess::release();
   if (UseCompressedOops) {
--- a/src/share/vm/runtime/arguments.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/runtime/arguments.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -1577,18 +1577,9 @@
     sprintf(buffer, "java.lang.Integer.IntegerCache.high=" INTX_FORMAT, AutoBoxCacheMax);
     add_property(buffer);
   }
-  if (AggressiveOpts && FLAG_IS_DEFAULT(DoEscapeAnalysis)) {
-    FLAG_SET_DEFAULT(DoEscapeAnalysis, true);
-  }
   if (AggressiveOpts && FLAG_IS_DEFAULT(BiasedLockingStartupDelay)) {
     FLAG_SET_DEFAULT(BiasedLockingStartupDelay, 500);
   }
-  if (AggressiveOpts && FLAG_IS_DEFAULT(OptimizeStringConcat)) {
-    FLAG_SET_DEFAULT(OptimizeStringConcat, true);
-  }
-  if (AggressiveOpts && FLAG_IS_DEFAULT(OptimizeFill)) {
-    FLAG_SET_DEFAULT(OptimizeFill, true);
-  }
 #endif

   if (AggressiveOpts) {
--- a/src/share/vm/runtime/deoptimization.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/runtime/deoptimization.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -362,8 +362,6 @@
   intptr_t* frame_sizes = NEW_C_HEAP_ARRAY(intptr_t, number_of_frames);
   // +1 because we always have an interpreter return address for the final slot.
   address* frame_pcs = NEW_C_HEAP_ARRAY(address, number_of_frames + 1);
-  int callee_parameters = 0;
-  int callee_locals = 0;
   int popframe_extra_args = 0;
   // Create an interpreter return address for the stub to use as its return
   // address so the skeletal frames are perfectly walkable
@@ -387,14 +385,16 @@
   // handles are used.  If the caller is interpreted get the real
   // value so that the proper amount of space can be added to it's
   // frame.
-  int caller_actual_parameters = callee_parameters;
+  bool caller_was_method_handle = false;
   if (deopt_sender.is_interpreted_frame()) {
     methodHandle method = deopt_sender.interpreter_frame_method();
-    Bytecode_invoke cur = Bytecode_invoke_check(method,
-                                                deopt_sender.interpreter_frame_bci());
-    Symbol* signature = method->constants()->signature_ref_at(cur.index());
-    ArgumentSizeComputer asc(signature);
-    caller_actual_parameters = asc.size() + (cur.has_receiver() ? 1 : 0);
+    Bytecode_invoke cur = Bytecode_invoke_check(method, deopt_sender.interpreter_frame_bci());
+    if (cur.is_method_handle_invoke()) {
+      // Method handle invokes may involve fairly arbitrary chains of
+      // calls so it's impossible to know how much actual space the
+      // caller has for locals.
+      caller_was_method_handle = true;
+    }
   }

   //
@@ -411,14 +411,15 @@
   // in the frame_sizes/frame_pcs so the assembly code can do a trivial walk.
   // so things look a little strange in this loop.
   //
+  int callee_parameters = 0;
+  int callee_locals = 0;
   for (int index = 0; index < array->frames(); index++ ) {
     // frame[number_of_frames - 1 ] = on_stack_size(youngest)
     // frame[number_of_frames - 2 ] = on_stack_size(sender(youngest))
     // frame[number_of_frames - 3 ] = on_stack_size(sender(sender(youngest)))
     int caller_parms = callee_parameters;
-    if (index == array->frames() - 1) {
-      // Use the value from the interpreted caller
-      caller_parms = caller_actual_parameters;
+    if ((index == array->frames() - 1) && caller_was_method_handle) {
+      caller_parms = 0;
     }
     frame_sizes[number_of_frames - 1 - index] = BytesPerWord * array->element(index)->on_stack_size(caller_parms,
                                                                                                     callee_parameters,
@@ -460,13 +461,13 @@
   // QQQ I'd rather see this pushed down into last_frame_adjust
   // and have it take the sender (aka caller).

-  if (deopt_sender.is_compiled_frame()) {
+  if (deopt_sender.is_compiled_frame() || caller_was_method_handle) {
     caller_adjustment = last_frame_adjust(0, callee_locals);
-  } else if (callee_locals > caller_actual_parameters) {
+  } else if (callee_locals > callee_parameters) {
     // The caller frame may need extending to accommodate
     // non-parameter locals of the first unpacked interpreted frame.
     // Compute that adjustment.
-    caller_adjustment = last_frame_adjust(caller_actual_parameters, callee_locals);
+    caller_adjustment = last_frame_adjust(callee_parameters, callee_locals);
   }

   // If the sender is deoptimized the we must retrieve the address of the handler
@@ -481,7 +482,7 @@

   UnrollBlock* info = new UnrollBlock(array->frame_size() * BytesPerWord,
                                       caller_adjustment * BytesPerWord,
-                                      caller_actual_parameters,
+                                      caller_was_method_handle ? 0 : callee_parameters,
                                       number_of_frames,
                                       frame_sizes,
                                       frame_pcs,
--- a/src/share/vm/runtime/frame.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/runtime/frame.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -1338,7 +1338,11 @@
     // Label values common to most frames
     values.describe(-1, unextended_sp(), err_msg("unextended_sp for #%d", frame_no));
     values.describe(-1, sp(), err_msg("sp for #%d", frame_no));
-    values.describe(-1, fp(), err_msg("fp for #%d", frame_no));
+    if (is_compiled_frame()) {
+      values.describe(-1, sp() + _cb->frame_size(), err_msg("computed fp for #%d", frame_no));
+    } else {
+      values.describe(-1, fp(), err_msg("fp for #%d", frame_no));
+    }
   }
   if (is_interpreted_frame()) {
     methodOop m = interpreter_frame_method();
@@ -1450,9 +1454,8 @@
 }


-void FrameValues::print() {
+void FrameValues::print(JavaThread* thread) {
   _values.sort(compare);
-  JavaThread* thread = JavaThread::current();

   // Sometimes values like the fp can be invalid values if the
   // register map wasn't updated during the walk.  Trim out values
@@ -1460,12 +1463,22 @@
   int min_index = 0;
   int max_index = _values.length() - 1;
   intptr_t* v0 = _values.at(min_index).location;
-  while (!thread->is_in_stack((address)v0)) {
-    v0 = _values.at(++min_index).location;
-  }
   intptr_t* v1 = _values.at(max_index).location;
-  while (!thread->is_in_stack((address)v1)) {
-    v1 = _values.at(--max_index).location;
+
+  if (thread == Thread::current()) {
+    while (!thread->is_in_stack((address)v0)) {
+      v0 = _values.at(++min_index).location;
+    }
+    while (!thread->is_in_stack((address)v1)) {
+      v1 = _values.at(--max_index).location;
+    }
+  } else {
+    while (!thread->on_local_stack((address)v0)) {
+      v0 = _values.at(++min_index).location;
+    }
+    while (!thread->on_local_stack((address)v1)) {
+      v1 = _values.at(--max_index).location;
+    }
   }
   intptr_t* min = MIN2(v0, v1);
   intptr_t* max = MAX2(v0, v1);
--- a/src/share/vm/runtime/frame.hpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/runtime/frame.hpp	Fri Nov 18 15:15:13 2011 -0800
@@ -516,7 +516,7 @@
   void describe(int owner, intptr_t* location, const char* description, int priority = 0);

   void validate();
-  void print();
+  void print(JavaThread* thread);
 };

 #endif
--- a/src/share/vm/runtime/globals.hpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/runtime/globals.hpp	Fri Nov 18 15:15:13 2011 -0800
@@ -577,8 +577,8 @@
   develop(bool, VerifyStack, false,                                         \
           "Verify stack of each thread when it is entering a runtime call") \
                                                                             \
-  develop(bool, ForceUnreachable, false,                                    \
-          "(amd64) Make all non code cache addresses to be unreachable with rip-rel forcing use of 64bit literal fixups") \
+  diagnostic(bool, ForceUnreachable, false,                                 \
+          "Make all non code cache addresses to be unreachable with forcing use of 64bit literal fixups") \
                                                                             \
   notproduct(bool, StressDerivedPointers, false,                            \
           "Force scavenge when a derived pointers is detected on stack "    \
@@ -904,7 +904,7 @@
   product(bool, AlwaysRestoreFPU, false,                                    \
           "Restore the FPU control word after every JNI call (expensive)")  \
                                                                             \
-  notproduct(bool, PrintCompilation2, false,                                \
+  diagnostic(bool, PrintCompilation2, false,                                \
           "Print additional statistics per compilation")                    \
                                                                             \
   diagnostic(bool, PrintAdapterHandlers, false,                             \
@@ -2580,7 +2580,7 @@
   diagnostic(bool, DebugInlinedCalls, true,                                 \
          "If false, restricts profiled locations to the root method only")  \
                                                                             \
-  product(bool, PrintVMOptions, NOT_EMBEDDED(trueInDebug) EMBEDDED_ONLY(false),\
+  product(bool, PrintVMOptions, false,                                      \
          "Print flags that appeared on the command line")                   \
                                                                             \
   product(bool, IgnoreUnrecognizedVMOptions, false,                         \
@@ -3364,7 +3364,7 @@
   notproduct(bool, ExitOnFullCodeCache, false,                              \
           "Exit the VM if we fill the code cache.")                         \
                                                                             \
-  product(bool, UseCodeCacheFlushing, false,                                \
+  product(bool, UseCodeCacheFlushing, true,                                 \
           "Attempt to clean the code cache before shutting off compiler")   \
                                                                             \
   product(intx,  MinCodeCacheFlushingInterval, 30,                          \
--- a/src/share/vm/runtime/interfaceSupport.hpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/runtime/interfaceSupport.hpp	Fri Nov 18 15:15:13 2011 -0800
@@ -72,9 +72,9 @@
   }
 };

-// InterfaceSupport provides functionality used by the __LEAF and __ENTRY
-// macros. These macros are used to guard entry points into the VM and
-// perform checks upon leave of the VM.
+// InterfaceSupport provides functionality used by the VM_LEAF_BASE and
+// VM_ENTRY_BASE macros. These macros are used to guard entry points into
+// the VM and perform checks upon leave of the VM.


 class InterfaceSupport: AllStatic {
@@ -433,7 +433,7 @@

 // LEAF routines do not lock, GC or throw exceptions

-#define __LEAF(result_type, header)                                  \
+#define VM_LEAF_BASE(result_type, header)                            \
   TRACE_CALL(result_type, header)                                    \
   debug_only(NoHandleMark __hm;)                                     \
   /* begin of body */
@@ -441,7 +441,7 @@

 // ENTRY routines may lock, GC and throw exceptions

-#define __ENTRY(result_type, header, thread)                         \
+#define VM_ENTRY_BASE(result_type, header, thread)                   \
   TRACE_CALL(result_type, header)                                    \
   HandleMarkCleaner __hm(thread);                                    \
   Thread* THREAD = thread;                                           \
@@ -450,7 +450,7 @@

 // QUICK_ENTRY routines behave like ENTRY but without a handle mark

-#define __QUICK_ENTRY(result_type, header, thread)                   \
+#define VM_QUICK_ENTRY_BASE(result_type, header, thread)             \
   TRACE_CALL(result_type, header)                                    \
   debug_only(NoHandleMark __hm;)                                     \
   Thread* THREAD = thread;                                           \
@@ -463,20 +463,20 @@
 #define IRT_ENTRY(result_type, header)                               \
   result_type header {                                               \
     ThreadInVMfromJava __tiv(thread);                                \
-    __ENTRY(result_type, header, thread)                             \
+    VM_ENTRY_BASE(result_type, header, thread)                       \
     debug_only(VMEntryWrapper __vew;)


 #define IRT_LEAF(result_type, header)                                \
   result_type header {                                               \
-    __LEAF(result_type, header)                                      \
+    VM_LEAF_BASE(result_type, header)                                \
     debug_only(No_Safepoint_Verifier __nspv(true);)


 #define IRT_ENTRY_NO_ASYNC(result_type, header)                      \
   result_type header {                                               \
     ThreadInVMfromJavaNoAsyncException __tiv(thread);                \
-    __ENTRY(result_type, header, thread)                             \
+    VM_ENTRY_BASE(result_type, header, thread)                       \
     debug_only(VMEntryWrapper __vew;)

 // Another special case for nmethod_entry_point so the nmethod that the
@@ -487,7 +487,7 @@
   result_type header {                                               \
     nmethodLocker _nmlock(nm);                                       \
     ThreadInVMfromJavaNoAsyncException __tiv(thread);                                \
-    __ENTRY(result_type, header, thread)
+    VM_ENTRY_BASE(result_type, header, thread)

 #define IRT_END }

@@ -497,20 +497,20 @@
 #define JRT_ENTRY(result_type, header)                               \
   result_type header {                                               \
     ThreadInVMfromJava __tiv(thread);                                \
-    __ENTRY(result_type, header, thread)                             \
+    VM_ENTRY_BASE(result_type, header, thread)                       \
     debug_only(VMEntryWrapper __vew;)


 #define JRT_LEAF(result_type, header)                                \
   result_type header {                                               \
-  __LEAF(result_type, header)                                        \
+  VM_LEAF_BASE(result_type, header)                                  \
   debug_only(JRT_Leaf_Verifier __jlv;)


 #define JRT_ENTRY_NO_ASYNC(result_type, header)                      \
   result_type header {                                               \
     ThreadInVMfromJavaNoAsyncException __tiv(thread);                \
-    __ENTRY(result_type, header, thread)                             \
+    VM_ENTRY_BASE(result_type, header, thread)                       \
     debug_only(VMEntryWrapper __vew;)

 // Same as JRT Entry but allows for return value after the safepoint
@@ -543,11 +543,11 @@
     assert( !VerifyJNIEnvThread || (thread == Thread::current()), "JNIEnv is only valid in same thread"); \
     ThreadInVMfromNative __tiv(thread);                              \
     debug_only(VMNativeEntryWrapper __vew;)                          \
-    __ENTRY(result_type, header, thread)
+    VM_ENTRY_BASE(result_type, header, thread)


 // Ensure that the VMNativeEntryWrapper constructor, which can cause
-// a GC, is called outside the NoHandleMark (set via __QUICK_ENTRY).
+// a GC, is called outside the NoHandleMark (set via VM_QUICK_ENTRY_BASE).
 #define JNI_QUICK_ENTRY(result_type, header)                         \
 extern "C" {                                                         \
   result_type JNICALL header {                                \
@@ -555,7 +555,7 @@
     assert( !VerifyJNIEnvThread || (thread == Thread::current()), "JNIEnv is only valid in same thread"); \
     ThreadInVMfromNative __tiv(thread);                              \
     debug_only(VMNativeEntryWrapper __vew;)                          \
-    __QUICK_ENTRY(result_type, header, thread)
+    VM_QUICK_ENTRY_BASE(result_type, header, thread)


 #define JNI_LEAF(result_type, header)                                \
@@ -563,7 +563,7 @@
   result_type JNICALL header {                                \
     JavaThread* thread=JavaThread::thread_from_jni_environment(env); \
     assert( !VerifyJNIEnvThread || (thread == Thread::current()), "JNIEnv is only valid in same thread"); \
-    __LEAF(result_type, header)
+    VM_LEAF_BASE(result_type, header)


 // Close the routine and the extern "C"
@@ -579,7 +579,7 @@
     JavaThread* thread=JavaThread::thread_from_jni_environment(env); \
     ThreadInVMfromNative __tiv(thread);                              \
     debug_only(VMNativeEntryWrapper __vew;)                          \
-    __ENTRY(result_type, header, thread)
+    VM_ENTRY_BASE(result_type, header, thread)


 #define JVM_ENTRY_NO_ENV(result_type, header)                        \
@@ -588,7 +588,7 @@
     JavaThread* thread = (JavaThread*)ThreadLocalStorage::thread();  \
     ThreadInVMfromNative __tiv(thread);                              \
     debug_only(VMNativeEntryWrapper __vew;)                          \
-    __ENTRY(result_type, header, thread)
+    VM_ENTRY_BASE(result_type, header, thread)


 #define JVM_QUICK_ENTRY(result_type, header)                         \
@@ -597,14 +597,14 @@
     JavaThread* thread=JavaThread::thread_from_jni_environment(env); \
     ThreadInVMfromNative __tiv(thread);                              \
     debug_only(VMNativeEntryWrapper __vew;)                          \
-    __QUICK_ENTRY(result_type, header, thread)
+    VM_QUICK_ENTRY_BASE(result_type, header, thread)


 #define JVM_LEAF(result_type, header)                                \
 extern "C" {                                                         \
   result_type JNICALL header {                                       \
     VM_Exit::block_if_vm_exited();                                   \
-    __LEAF(result_type, header)
+    VM_LEAF_BASE(result_type, header)


 #define JVM_END } }
--- a/src/share/vm/runtime/sharedRuntime.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/runtime/sharedRuntime.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -1672,9 +1672,12 @@
   nmethod* nm = cb->as_nmethod_or_null();
   assert(nm, "must be");

-  // Don't fixup MethodHandle call sites as c2i/i2c adapters are used
-  // to implement MethodHandle actions.
-  if (nm->is_method_handle_return(caller_pc)) {
+  // Get the return PC for the passed caller PC.
+  address return_pc = caller_pc + frame::pc_return_offset;
+
+  // Don't fixup method handle call sites as the executed method
+  // handle adapters are doing the required MethodHandle chain work.
+  if (nm->is_method_handle_return(return_pc)) {
     return;
   }

@@ -1693,8 +1696,8 @@

     // Expect to find a native call there (unless it was no-inline cache vtable dispatch)
     MutexLockerEx ml_patch(Patching_lock, Mutex::_no_safepoint_check_flag);
-    if (NativeCall::is_call_before(caller_pc + frame::pc_return_offset)) {
-      NativeCall *call = nativeCall_before(caller_pc + frame::pc_return_offset);
+    if (NativeCall::is_call_before(return_pc)) {
+      NativeCall *call = nativeCall_before(return_pc);
       //
       // bug 6281185. We might get here after resolving a call site to a vanilla
       // virtual call. Because the resolvee uses the verified entry it may then
@@ -1744,7 +1747,6 @@
       }
     }
   }
-
 IRT_END
--- a/src/share/vm/runtime/thread.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/runtime/thread.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -2947,7 +2947,7 @@
     values.validate();
   } else {
     tty->print_cr("[Describe stack layout]");
-    values.print();
+    values.print(this);
   }
 }
 #endif
--- a/src/share/vm/services/heapDumper.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/services/heapDumper.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -27,6 +27,7 @@
 #include "classfile/systemDictionary.hpp"
 #include "classfile/vmSymbols.hpp"
 #include "gc_implementation/shared/vmGCOperations.hpp"
+#include "memory/gcLocker.inline.hpp"
 #include "memory/genCollectedHeap.hpp"
 #include "memory/universe.hpp"
 #include "oops/objArrayKlass.hpp"
@@ -1709,11 +1710,16 @@

   HandleMark hm;
   CollectedHeap* ch = Universe::heap();
+
+  ch->ensure_parsability(false); // must happen, even if collection does
+                                 // not happen (e.g. due to GC_locker)
+
   if (_gc_before_heap_dump) {
-    ch->collect_as_vm_thread(GCCause::_heap_dump);
-  } else {
-    // make the heap parsable (no need to retire TLABs)
-    ch->ensure_parsability(false);
+    if (GC_locker::is_active()) {
+      warning("GC locker is held; pre-heapdump GC was skipped");
+    } else {
+      ch->collect_as_vm_thread(GCCause::_heap_dump);
+    }
   }

   // At this point we should be the only dumper active, so
--- a/src/share/vm/utilities/quickSort.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/utilities/quickSort.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -23,13 +23,13 @@
  */

 #include "precompiled.hpp"
-#include "utilities/quickSort.hpp"
+
+/////////////// Unit tests ///////////////

 #ifndef PRODUCT

-// Unit tests
-
 #include "runtime/os.hpp"
+#include "utilities/quickSort.hpp"
 #include <stdlib.h>

 static int test_comparator(int a, int b) {
@@ -94,7 +94,7 @@
 }

 bool QuickSort::test_quick_sort() {
-  tty->print_cr("test_quick_sort\n");
+  tty->print_cr("test_quick_sort");
   {
     int* test_array = NULL;
     int* expected_array = NULL;
--- a/src/share/vm/utilities/vmError.cpp	Thu Nov 17 10:45:53 2011 -0800
+++ b/src/share/vm/utilities/vmError.cpp	Fri Nov 18 15:15:13 2011 -0800
@@ -680,8 +680,10 @@
   STEP(190, "(printing heap information)" )

      if (_verbose && Universe::is_fully_initialized()) {
-       // print heap information before vm abort
-       Universe::print_on(st);
+       // Print heap information before vm abort. As we'd like as much
+       // information as possible in the report we ask for the
+       // extended (i.e., more detailed) version.
+       Universe::print_on(st, true /* extended */);
        st->cr();
      }
--- a/test/Makefile	Thu Nov 17 10:45:53 2011 -0800
+++ b/test/Makefile	Fri Nov 18 15:15:13 2011 -0800
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 1995, 2011, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -219,6 +219,15 @@

 ################################################################

+# internalvmtests (run internal unit tests inside the VM)
+
+internalvmtests: prep $(PRODUCT_HOME)
+	$(PRODUCT_HOME)/bin/java $(JAVA_OPTIONS) -XX:+ExecuteInternalVMTests -version
+
+PHONY_LIST += internalvmtests
+
+################################################################
+
 # packtest

 # Expect JPRT to set JPRT_PACKTEST_HOME.
--- a/test/compiler/6865265/StackOverflowBug.java	Thu Nov 17 10:45:53 2011 -0800
+++ b/test/compiler/6865265/StackOverflowBug.java	Fri Nov 18 15:15:13 2011 -0800
@@ -28,7 +28,7 @@
  * @summary JVM crashes with "missing exception handler" error
  * @author volker.simonis@sap.com
  *
- * @run main/othervm -XX:CompileThreshold=100 -Xbatch -Xss128k StackOverflowBug
+ * @run main/othervm -XX:CompileThreshold=100 -Xbatch -Xss224k StackOverflowBug
  */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7103261/Test7103261.java	Fri Nov 18 15:15:13 2011 -0800
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7103261
+ * @summary crash with jittester on sparc
+ *
+ * @run main Test7103261
+ */
+
+// exercise implicit null checking in the compiler for various field types
+public class Test7103261 {
+    static Test7103261 null_value;
+    static Test7103261 nonnull_value = new Test7103261();
+    static Test7103261 nonnull_value2 = new Test7103261();
+
+    long l;
+    int i;
+    float f;
+    double d;
+    byte b;
+    char c;
+    short s;
+    boolean z;
+
+    public static void main(String[] args) {
+        constantStore();
+        valueTest(false);
+        valueTest(true);
+    }
+    static void constantStore() {
+        for (int field = 0; field < 8; field++) {
+            try {
+                Test7103261 o = nonnull_value;
+                for (int i = 0; i < 100000; i++) {
+                    switch (field) {
+                    case 0: o.l = 0; break;
+                    case 1: o.i = 0; break;
+                    case 2: o.f = 0; break;
+                    case 3: o.d = 0; break;
+                    case 4: o.b = 0; break;
+                    case 5: o.c = 0; break;
+                    case 6: o.s = 0; break;
+                    case 7: o.z = false; break;
+                    default: throw new InternalError();
+                    }
+                    if (i == 90000) {
+                        // hide nullness from optimizer
+                        o = null_value;
+                    }
+                }
+            } catch (NullPointerException npe) {
+            }
+        }
+    }
+    static void valueTest(boolean store) {
+        for (int field = 0; field < 8; field++) {
+            try {
+                Test7103261 o  = nonnull_value;
+                Test7103261 o2 = nonnull_value2;
+                for (int i = 0; i < 100000; i++) {
+                    switch (field) {
+                    case 0: o.l = o2.l; break;
+                    case 1: o.i = o2.i; break;
+                    case 2: o.f = o2.f; break;
+                    case 3: o.d = o2.d; break;
+                    case 4: o.b = o2.b; break;
+                    case 5: o.c = o2.c; break;
+                    case 6: o.s = o2.s; break;
+                    case 7: o.z = o2.z; break;
+                    default: throw new InternalError();
+                    }
+                    if (i == 90000) {
+                        // hide nullness from optimizer
+                        if (store)
+                            o = null_value;
+                        else
+                            o2 = null_value;
+                    }
+                }
+            } catch (NullPointerException npe) {
+            }
+        }
+    }
+}