# HG changeset patch
# User coleenp
# Date 1264201613 28800
# Node ID 2718ec34c69914ad7649938707e73f5aaec18f60
# Parent  3908ad1248385c1db42bb10e95197769fbf76352# Parent  3d6016e040d634fea74ef24b51d9503bc8a2368f
Merge

diff -r 3908ad124838 -r 2718ec34c699 .hgtags
--- a/.hgtags	Wed Jan 20 11:32:41 2010 -0700
+++ b/.hgtags	Fri Jan 22 15:06:53 2010 -0800
@@ -53,3 +53,4 @@
 9174bb32e934965288121f75394874eeb1fcb649 jdk7-b76
 455105fc81d941482f8f8056afaa7aa0949c9300 jdk7-b77
 e703499b4b51e3af756ae77c3d5e8b3058a14e4e jdk7-b78
+a5a6adfca6ecefb5894a848debabfe442ff50e25 jdk7-b79
diff -r 3908ad124838 -r 2718ec34c699 make/hotspot_version
--- a/make/hotspot_version	Wed Jan 20 11:32:41 2010 -0700
+++ b/make/hotspot_version	Fri Jan 22 15:06:53 2010 -0800
@@ -35,7 +35,7 @@
 
 HS_MAJOR_VER=17
 HS_MINOR_VER=0
-HS_BUILD_NUMBER=06
+HS_BUILD_NUMBER=07
 
 JDK_MAJOR_VER=1
 JDK_MINOR_VER=7
diff -r 3908ad124838 -r 2718ec34c699 src/cpu/sparc/vm/interpreter_sparc.cpp
--- a/src/cpu/sparc/vm/interpreter_sparc.cpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/cpu/sparc/vm/interpreter_sparc.cpp	Fri Jan 22 15:06:53 2010 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1997-2010 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -394,6 +394,11 @@
 }
 
 
+bool AbstractInterpreter::can_be_compiled(methodHandle m) {
+  // No special entry points that preclude compilation
+  return true;
+}
+
 // This method tells the deoptimizer how big an interpreted frame must be:
 int AbstractInterpreter::size_activation(methodOop method,
                                          int tempcount,
diff -r 3908ad124838 -r 2718ec34c699 src/cpu/sparc/vm/stubGenerator_sparc.cpp
--- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Fri Jan 22 15:06:53 2010 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1997-2010 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -2862,6 +2862,9 @@
 
     // arraycopy stubs used by compilers
     generate_arraycopy_stubs();
+
+    // Don't initialize the platform math functions since sparc
+    // doesn't have intrinsics for these operations.
   }
 
 
diff -r 3908ad124838 -r 2718ec34c699 src/cpu/x86/vm/stubGenerator_x86_32.cpp
--- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Fri Jan 22 15:06:53 2010 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright 1999-2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1999-2010 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -2030,6 +2030,54 @@
                                entry_checkcast_arraycopy);
   }
 
+  void generate_math_stubs() {
+    {
+      StubCodeMark mark(this, "StubRoutines", "log");
+      StubRoutines::_intrinsic_log = (double (*)(double)) __ pc();
+
+      __ fld_d(Address(rsp, 4));
+      __ flog();
+      __ ret(0);
+    }
+    {
+      StubCodeMark mark(this, "StubRoutines", "log10");
+      StubRoutines::_intrinsic_log10 = (double (*)(double)) __ pc();
+
+      __ fld_d(Address(rsp, 4));
+      __ flog10();
+      __ ret(0);
+    }
+    {
+      StubCodeMark mark(this, "StubRoutines", "sin");
+      StubRoutines::_intrinsic_sin = (double (*)(double))  __ pc();
+
+      __ fld_d(Address(rsp, 4));
+      __ trigfunc('s');
+      __ ret(0);
+    }
+    {
+      StubCodeMark mark(this, "StubRoutines", "cos");
+      StubRoutines::_intrinsic_cos = (double (*)(double)) __ pc();
+
+      __ fld_d(Address(rsp, 4));
+      __ trigfunc('c');
+      __ ret(0);
+    }
+    {
+      StubCodeMark mark(this, "StubRoutines", "tan");
+      StubRoutines::_intrinsic_tan = (double (*)(double)) __ pc();
+
+      __ fld_d(Address(rsp, 4));
+      __ trigfunc('t');
+      __ ret(0);
+    }
+
+    // The intrinsic version of these seem to return the same value as
+    // the strict version.
+    StubRoutines::_intrinsic_exp = SharedRuntime::dexp;
+    StubRoutines::_intrinsic_pow = SharedRuntime::dpow;
+  }
+
  public:
   // Information about frame layout at time of blocking runtime call.
   // Note that we only have to preserve callee-saved registers since
@@ -2228,6 +2276,8 @@
         MethodHandles::generate_method_handle_stub(_masm, ek);
       }
     }
+
+    generate_math_stubs();
   }
 
 
diff -r 3908ad124838 -r 2718ec34c699 src/cpu/x86/vm/stubGenerator_x86_64.cpp
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Fri Jan 22 15:06:53 2010 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright 2003-2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2003-2010 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -2731,6 +2731,79 @@
     StubRoutines::_arrayof_oop_arraycopy             = StubRoutines::_oop_arraycopy;
   }
 
+  void generate_math_stubs() {
+    {
+      StubCodeMark mark(this, "StubRoutines", "log");
+      StubRoutines::_intrinsic_log = (double (*)(double)) __ pc();
+
+      __ subq(rsp, 8);
+      __ movdbl(Address(rsp, 0), xmm0);
+      __ fld_d(Address(rsp, 0));
+      __ flog();
+      __ fstp_d(Address(rsp, 0));
+      __ movdbl(xmm0, Address(rsp, 0));
+      __ addq(rsp, 8);
+      __ ret(0);
+    }
+    {
+      StubCodeMark mark(this, "StubRoutines", "log10");
+      StubRoutines::_intrinsic_log10 = (double (*)(double)) __ pc();
+
+      __ subq(rsp, 8);
+      __ movdbl(Address(rsp, 0), xmm0);
+      __ fld_d(Address(rsp, 0));
+      __ flog10();
+      __ fstp_d(Address(rsp, 0));
+      __ movdbl(xmm0, Address(rsp, 0));
+      __ addq(rsp, 8);
+      __ ret(0);
+    }
+    {
+      StubCodeMark mark(this, "StubRoutines", "sin");
+      StubRoutines::_intrinsic_sin = (double (*)(double)) __ pc();
+
+      __ subq(rsp, 8);
+      __ movdbl(Address(rsp, 0), xmm0);
+      __ fld_d(Address(rsp, 0));
+      __ trigfunc('s');
+      __ fstp_d(Address(rsp, 0));
+      __ movdbl(xmm0, Address(rsp, 0));
+      __ addq(rsp, 8);
+      __ ret(0);
+    }
+    {
+      StubCodeMark mark(this, "StubRoutines", "cos");
+      StubRoutines::_intrinsic_cos = (double (*)(double)) __ pc();
+
+      __ subq(rsp, 8);
+      __ movdbl(Address(rsp, 0), xmm0);
+      __ fld_d(Address(rsp, 0));
+      __ trigfunc('c');
+      __ fstp_d(Address(rsp, 0));
+      __ movdbl(xmm0, Address(rsp, 0));
+      __ addq(rsp, 8);
+      __ ret(0);
+    }
+    {
+      StubCodeMark mark(this, "StubRoutines", "tan");
+      StubRoutines::_intrinsic_tan = (double (*)(double)) __ pc();
+
+      __ subq(rsp, 8);
+      __ movdbl(Address(rsp, 0), xmm0);
+      __ fld_d(Address(rsp, 0));
+      __ trigfunc('t');
+      __ fstp_d(Address(rsp, 0));
+      __ movdbl(xmm0, Address(rsp, 0));
+      __ addq(rsp, 8);
+      __ ret(0);
+    }
+
+    // The intrinsic version of these seem to return the same value as
+    // the strict version.
+    StubRoutines::_intrinsic_exp = SharedRuntime::dexp;
+    StubRoutines::_intrinsic_pow = SharedRuntime::dpow;
+  }
+
 #undef __
 #define __ masm->
 
@@ -2945,6 +3018,8 @@
         MethodHandles::generate_method_handle_stub(_masm, ek);
       }
     }
+
+    generate_math_stubs();
   }
 
  public:
diff -r 3908ad124838 -r 2718ec34c699 src/cpu/x86/vm/templateInterpreter_x86_32.cpp
--- a/src/cpu/x86/vm/templateInterpreter_x86_32.cpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/cpu/x86/vm/templateInterpreter_x86_32.cpp	Fri Jan 22 15:06:53 2010 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1997-2010 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1431,6 +1431,23 @@
 
 }
 
+// These should never be compiled since the interpreter will prefer
+// the compiled version to the intrinsic version.
+bool AbstractInterpreter::can_be_compiled(methodHandle m) {
+  switch (method_kind(m)) {
+    case Interpreter::java_lang_math_sin     : // fall thru
+    case Interpreter::java_lang_math_cos     : // fall thru
+    case Interpreter::java_lang_math_tan     : // fall thru
+    case Interpreter::java_lang_math_abs     : // fall thru
+    case Interpreter::java_lang_math_log     : // fall thru
+    case Interpreter::java_lang_math_log10   : // fall thru
+    case Interpreter::java_lang_math_sqrt    :
+      return false;
+    default:
+      return true;
+  }
+}
+
 // How much stack a method activation needs in words.
 int AbstractInterpreter::size_top_interpreter_activation(methodOop method) {
 
diff -r 3908ad124838 -r 2718ec34c699 src/cpu/x86/vm/templateInterpreter_x86_64.cpp
--- a/src/cpu/x86/vm/templateInterpreter_x86_64.cpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/cpu/x86/vm/templateInterpreter_x86_64.cpp	Fri Jan 22 15:06:53 2010 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright 2003-2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2003-2010 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1456,6 +1456,23 @@
                                 generate_normal_entry(synchronized);
 }
 
+// These should never be compiled since the interpreter will prefer
+// the compiled version to the intrinsic version.
+bool AbstractInterpreter::can_be_compiled(methodHandle m) {
+  switch (method_kind(m)) {
+    case Interpreter::java_lang_math_sin     : // fall thru
+    case Interpreter::java_lang_math_cos     : // fall thru
+    case Interpreter::java_lang_math_tan     : // fall thru
+    case Interpreter::java_lang_math_abs     : // fall thru
+    case Interpreter::java_lang_math_log     : // fall thru
+    case Interpreter::java_lang_math_log10   : // fall thru
+    case Interpreter::java_lang_math_sqrt    :
+      return false;
+    default:
+      return true;
+  }
+}
+
 // How much stack a method activation needs in words.
 int AbstractInterpreter::size_top_interpreter_activation(methodOop method) {
   const int entry_size = frame::interpreter_frame_monitor_size();
diff -r 3908ad124838 -r 2718ec34c699 src/os_cpu/linux_zero/vm/os_linux_zero.cpp
--- a/src/os_cpu/linux_zero/vm/os_linux_zero.cpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/os_cpu/linux_zero/vm/os_linux_zero.cpp	Fri Jan 22 15:06:53 2010 -0800
@@ -1,6 +1,6 @@
 /*
  * Copyright 2003-2007 Sun Microsystems, Inc.  All Rights Reserved.
- * Copyright 2007, 2008 Red Hat, Inc.
+ * Copyright 2007, 2008, 2009, 2010 Red Hat, Inc.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -239,7 +239,21 @@
 }
 
 bool os::is_allocatable(size_t bytes) {
-  ShouldNotCallThis();
+#ifdef _LP64
+  return true;
+#else
+  if (bytes < 2 * G) {
+    return true;
+  }
+
+  char* addr = reserve_memory(bytes, NULL);
+
+  if (addr != NULL) {
+    release_memory(addr, bytes);
+  }
+
+  return addr != NULL;
+#endif // _LP64
 }
 
 ///////////////////////////////////////////////////////////////////////////////
diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/c1/c1_LIR.hpp
--- a/src/share/vm/c1/c1_LIR.hpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/share/vm/c1/c1_LIR.hpp	Fri Jan 22 15:06:53 2010 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright 2000-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2000-2010 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -2000,7 +2000,7 @@
   typedef enum { inputMode, firstMode = inputMode, tempMode, outputMode, numModes, invalidMode = -1 } OprMode;
 
   enum {
-    maxNumberOfOperands = 14,
+    maxNumberOfOperands = 16,
     maxNumberOfInfos = 4
   };
 
diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/ci/ciField.cpp
--- a/src/share/vm/ci/ciField.cpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/share/vm/ci/ciField.cpp	Fri Jan 22 15:06:53 2010 -0800
@@ -161,6 +161,18 @@
          "bootstrap classes must not create & cache unshared fields");
 }
 
+static bool trust_final_non_static_fields(ciInstanceKlass* holder) {
+  if (holder == NULL)
+    return false;
+  if (holder->name() == ciSymbol::java_lang_System())
+    // Never trust strangely unstable finals:  System.out, etc.
+    return false;
+  // Even if general trusting is disabled, trust system-built closures in these packages.
+  if (holder->is_in_package("java/dyn") || holder->is_in_package("sun/dyn"))
+    return true;
+  return TrustFinalNonStaticFields;
+}
+
 void ciField::initialize_from(fieldDescriptor* fd) {
   // Get the flags, offset, and canonical holder of the field.
   _flags = ciFlags(fd->access_flags());
@@ -172,7 +184,7 @@
     if (!this->is_static()) {
       // A field can be constant if it's a final static field or if it's
       // a final non-static field of a trusted class ({java,sun}.dyn).
-      if (_holder->is_in_package("java/dyn") || _holder->is_in_package("sun/dyn")) {
+      if (trust_final_non_static_fields(_holder)) {
         _is_constant = true;
         return;
       }
diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Fri Jan 22 15:06:53 2010 -0800
@@ -1441,6 +1441,7 @@
 }
 
 jint G1CollectedHeap::initialize() {
+  CollectedHeap::pre_initialize();
   os::enable_vtime();
 
   // Necessary to satisfy locking discipline assertions.
diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Fri Jan 22 15:06:53 2010 -0800
@@ -1007,6 +1007,10 @@
     return true;
   }
 
+  virtual bool card_mark_must_follow_store() const {
+    return true;
+  }
+
   bool is_in_young(oop obj) {
     HeapRegion* hr = heap_region_containing(obj);
     return hr != NULL && hr->is_young();
diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/gc_implementation/g1/ptrQueue.cpp
--- a/src/share/vm/gc_implementation/g1/ptrQueue.cpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/share/vm/gc_implementation/g1/ptrQueue.cpp	Fri Jan 22 15:06:53 2010 -0800
@@ -73,7 +73,12 @@
 
 void PtrQueue::locking_enqueue_completed_buffer(void** buf) {
   assert(_lock->owned_by_self(), "Required.");
+
+  // We have to unlock _lock (which may be Shared_DirtyCardQ_lock) before
+  // we acquire DirtyCardQ_CBL_mon inside enqeue_complete_buffer as they
+  // have the same rank and we may get the "possible deadlock" message
   _lock->unlock();
+
   qset()->enqueue_complete_buffer(buf);
   // We must relock only because the caller will unlock, for the normal
   // case.
@@ -140,7 +145,36 @@
   // holding the lock if there is one).
   if (_buf != NULL) {
     if (_lock) {
-      locking_enqueue_completed_buffer(_buf);
+      assert(_lock->owned_by_self(), "Required.");
+
+      // The current PtrQ may be the shared dirty card queue and
+      // may be being manipulated by more than one worker thread
+      // during a pause. Since the enqueuing of the completed
+      // buffer unlocks the Shared_DirtyCardQ_lock more than one
+      // worker thread can 'race' on reading the shared queue attributes
+      // (_buf and _index) and multiple threads can call into this
+      // routine for the same buffer. This will cause the completed
+      // buffer to be added to the CBL multiple times.
+
+      // We "claim" the current buffer by caching value of _buf in
+      // a local and clearing the field while holding _lock. When
+      // _lock is released (while enqueueing the completed buffer)
+      // the thread that acquires _lock will skip this code,
+      // preventing the subsequent the multiple enqueue, and
+      // install a newly allocated buffer below.
+
+      void** buf = _buf;   // local pointer to completed buffer
+      _buf = NULL;         // clear shared _buf field
+
+      locking_enqueue_completed_buffer(buf);  // enqueue completed buffer
+
+      // While the current thread was enqueuing the buffer another thread
+      // may have a allocated a new buffer and inserted it into this pointer
+      // queue. If that happens then we just return so that the current
+      // thread doesn't overwrite the buffer allocated by the other thread
+      // and potentially losing some dirtied cards.
+
+      if (_buf != NULL) return;
     } else {
       if (qset()->process_or_enqueue_complete_buffer(_buf)) {
         // Recycle the buffer. No allocation.
diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/gc_implementation/g1/ptrQueue.inline.hpp
--- a/src/share/vm/gc_implementation/g1/ptrQueue.inline.hpp	Wed Jan 20 11:32:41 2010 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,41 +0,0 @@
-/*
- * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- */
-
-void PtrQueue::handle_zero_index() {
-  assert(0 == _index, "Precondition.");
-  // This thread records the full buffer and allocates a new one (while
-  // holding the lock if there is one).
-  void** buf = _buf;
-  _buf = qset()->allocate_buffer();
-  _sz = qset()->buffer_size();
-  _index = _sz;
-  assert(0 <= _index && _index <= _sz, "Invariant.");
-  if (buf != NULL) {
-    if (_lock) {
-      locking_enqueue_completed_buffer(buf);
-    } else {
-      qset()->enqueue_complete_buffer(buf);
-    }
-  }
-}
diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp
--- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Fri Jan 22 15:06:53 2010 -0800
@@ -51,6 +51,8 @@
 }
 
 jint ParallelScavengeHeap::initialize() {
+  CollectedHeap::pre_initialize();
+
   // Cannot be initialized until after the flags are parsed
   GenerationSizer flag_parser;
 
@@ -717,10 +719,6 @@
   return young_gen()->allocate(size, true);
 }
 
-void ParallelScavengeHeap::fill_all_tlabs(bool retire) {
-  CollectedHeap::fill_all_tlabs(retire);
-}
-
 void ParallelScavengeHeap::accumulate_statistics_all_tlabs() {
   CollectedHeap::accumulate_statistics_all_tlabs();
 }
diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp
--- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp	Fri Jan 22 15:06:53 2010 -0800
@@ -54,7 +54,6 @@
  protected:
   static inline size_t total_invocations();
   HeapWord* allocate_new_tlab(size_t size);
-  void fill_all_tlabs(bool retire);
 
  public:
   ParallelScavengeHeap() : CollectedHeap() {
@@ -191,6 +190,10 @@
     return true;
   }
 
+  virtual bool card_mark_must_follow_store() const {
+    return false;
+  }
+
   // Return true if we don't we need a store barrier for
   // initializing stores to an object at this address.
   virtual bool can_elide_initializing_store_barrier(oop new_obj);
diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/gc_interface/collectedHeap.cpp
--- a/src/share/vm/gc_interface/collectedHeap.cpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/share/vm/gc_interface/collectedHeap.cpp	Fri Jan 22 15:06:53 2010 -0800
@@ -59,8 +59,18 @@
                 PerfDataManager::create_string_variable(SUN_GC, "lastCause",
                              80, GCCause::to_string(_gc_lastcause), CHECK);
   }
+  _defer_initial_card_mark = false; // strengthened by subclass in pre_initialize() below.
 }
 
+void CollectedHeap::pre_initialize() {
+  // Used for ReduceInitialCardMarks (when COMPILER2 is used);
+  // otherwise remains unused.
+#ifdef COMPLER2
+  _defer_initial_card_mark = ReduceInitialCardMarks && (DeferInitialCardMark || card_mark_must_follow_store());
+#else
+  assert(_defer_initial_card_mark == false, "Who would set it?");
+#endif
+}
 
 #ifndef PRODUCT
 void CollectedHeap::check_for_bad_heap_word_value(HeapWord* addr, size_t size) {
@@ -140,12 +150,13 @@
 void CollectedHeap::flush_deferred_store_barrier(JavaThread* thread) {
   MemRegion deferred = thread->deferred_card_mark();
   if (!deferred.is_empty()) {
+    assert(_defer_initial_card_mark, "Otherwise should be empty");
     {
       // Verify that the storage points to a parsable object in heap
       DEBUG_ONLY(oop old_obj = oop(deferred.start());)
       assert(is_in(old_obj), "Not in allocated heap");
       assert(!can_elide_initializing_store_barrier(old_obj),
-             "Else should have been filtered in defer_store_barrier()");
+             "Else should have been filtered in new_store_pre_barrier()");
       assert(!is_in_permanent(old_obj), "Sanity: not expected");
       assert(old_obj->is_oop(true), "Not an oop");
       assert(old_obj->is_parsable(), "Will not be concurrently parsable");
@@ -174,9 +185,7 @@
 //     so long as the card-mark is completed before the next
 //     scavenge. For all these cases, we can do a card mark
 //     at the point at which we do a slow path allocation
-//     in the old gen. For uniformity, however, we end
-//     up using the same scheme (see below) for all three
-//     cases (deferring the card-mark appropriately).
+//     in the old gen, i.e. in this call.
 // (b) GenCollectedHeap(ConcurrentMarkSweepGeneration) requires
 //     in addition that the card-mark for an old gen allocated
 //     object strictly follow any associated initializing stores.
@@ -199,12 +208,13 @@
 //     but, like in CMS, because of the presence of concurrent refinement
 //     (much like CMS' precleaning), must strictly follow the oop-store.
 //     Thus, using the same protocol for maintaining the intended
-//     invariants turns out, serendepitously, to be the same for all
-//     three collectors/heap types above.
+//     invariants turns out, serendepitously, to be the same for both
+//     G1 and CMS.
 //
-// For each future collector, this should be reexamined with
-// that specific collector in mind.
-oop CollectedHeap::defer_store_barrier(JavaThread* thread, oop new_obj) {
+// For any future collector, this code should be reexamined with
+// that specific collector in mind, and the documentation above suitably
+// extended and updated.
+oop CollectedHeap::new_store_pre_barrier(JavaThread* thread, oop new_obj) {
   // If a previous card-mark was deferred, flush it now.
   flush_deferred_store_barrier(thread);
   if (can_elide_initializing_store_barrier(new_obj)) {
@@ -212,10 +222,17 @@
     // following the flush above.
     assert(thread->deferred_card_mark().is_empty(), "Error");
   } else {
-    // Remember info for the newly deferred store barrier
-    MemRegion deferred = MemRegion((HeapWord*)new_obj, new_obj->size());
-    assert(!deferred.is_empty(), "Error");
-    thread->set_deferred_card_mark(deferred);
+    MemRegion mr((HeapWord*)new_obj, new_obj->size());
+    assert(!mr.is_empty(), "Error");
+    if (_defer_initial_card_mark) {
+      // Defer the card mark
+      thread->set_deferred_card_mark(mr);
+    } else {
+      // Do the card mark
+      BarrierSet* bs = barrier_set();
+      assert(bs->has_write_region_opt(), "No write_region() on BarrierSet");
+      bs->write_region(mr);
+    }
   }
   return new_obj;
 }
@@ -241,9 +258,9 @@
   assert(Universe::heap()->is_in_reserved(start + words - 1), "not in heap");
 }
 
-void CollectedHeap::zap_filler_array(HeapWord* start, size_t words)
+void CollectedHeap::zap_filler_array(HeapWord* start, size_t words, bool zap)
 {
-  if (ZapFillerObjects) {
+  if (ZapFillerObjects && zap) {
     Copy::fill_to_words(start + filler_array_hdr_size(),
                         words - filler_array_hdr_size(), 0XDEAFBABE);
   }
@@ -251,7 +268,7 @@
 #endif // ASSERT
 
 void
-CollectedHeap::fill_with_array(HeapWord* start, size_t words)
+CollectedHeap::fill_with_array(HeapWord* start, size_t words, bool zap)
 {
   assert(words >= filler_array_min_size(), "too small for an array");
   assert(words <= filler_array_max_size(), "too big for a single object");
@@ -262,16 +279,16 @@
   // Set the length first for concurrent GC.
   ((arrayOop)start)->set_length((int)len);
   post_allocation_setup_common(Universe::intArrayKlassObj(), start, words);
-  DEBUG_ONLY(zap_filler_array(start, words);)
+  DEBUG_ONLY(zap_filler_array(start, words, zap);)
 }
 
 void
-CollectedHeap::fill_with_object_impl(HeapWord* start, size_t words)
+CollectedHeap::fill_with_object_impl(HeapWord* start, size_t words, bool zap)
 {
   assert(words <= filler_array_max_size(), "too big for a single object");
 
   if (words >= filler_array_min_size()) {
-    fill_with_array(start, words);
+    fill_with_array(start, words, zap);
   } else if (words > 0) {
     assert(words == min_fill_size(), "unaligned size");
     post_allocation_setup_common(SystemDictionary::Object_klass(), start,
@@ -279,14 +296,14 @@
   }
 }
 
-void CollectedHeap::fill_with_object(HeapWord* start, size_t words)
+void CollectedHeap::fill_with_object(HeapWord* start, size_t words, bool zap)
 {
   DEBUG_ONLY(fill_args_check(start, words);)
   HandleMark hm;  // Free handles before leaving.
-  fill_with_object_impl(start, words);
+  fill_with_object_impl(start, words, zap);
 }
 
-void CollectedHeap::fill_with_objects(HeapWord* start, size_t words)
+void CollectedHeap::fill_with_objects(HeapWord* start, size_t words, bool zap)
 {
   DEBUG_ONLY(fill_args_check(start, words);)
   HandleMark hm;  // Free handles before leaving.
@@ -299,13 +316,13 @@
   const size_t max = filler_array_max_size();
   while (words > max) {
     const size_t cur = words - max >= min ? max : max - min;
-    fill_with_array(start, cur);
+    fill_with_array(start, cur, zap);
     start += cur;
     words -= cur;
   }
 #endif
 
-  fill_with_object_impl(start, words);
+  fill_with_object_impl(start, words, zap);
 }
 
 HeapWord* CollectedHeap::allocate_new_tlab(size_t size) {
@@ -313,22 +330,6 @@
   return NULL;
 }
 
-void CollectedHeap::fill_all_tlabs(bool retire) {
-  assert(UseTLAB, "should not reach here");
-  // See note in ensure_parsability() below.
-  assert(SafepointSynchronize::is_at_safepoint() ||
-         !is_init_completed(),
-         "should only fill tlabs at safepoint");
-  // The main thread starts allocating via a TLAB even before it
-  // has added itself to the threads list at vm boot-up.
-  assert(Threads::first() != NULL,
-         "Attempt to fill tlabs before main thread has been added"
-         " to threads list is doomed to failure!");
-  for(JavaThread *thread = Threads::first(); thread; thread = thread->next()) {
-     thread->tlab().make_parsable(retire);
-  }
-}
-
 void CollectedHeap::ensure_parsability(bool retire_tlabs) {
   // The second disjunct in the assertion below makes a concession
   // for the start-up verification done while the VM is being
@@ -343,8 +344,24 @@
          "Should only be called at a safepoint or at start-up"
          " otherwise concurrent mutator activity may make heap "
          " unparsable again");
-  if (UseTLAB) {
-    fill_all_tlabs(retire_tlabs);
+  const bool use_tlab = UseTLAB;
+  const bool deferred = _defer_initial_card_mark;
+  // The main thread starts allocating via a TLAB even before it
+  // has added itself to the threads list at vm boot-up.
+  assert(!use_tlab || Threads::first() != NULL,
+         "Attempt to fill tlabs before main thread has been added"
+         " to threads list is doomed to failure!");
+  for (JavaThread *thread = Threads::first(); thread; thread = thread->next()) {
+     if (use_tlab) thread->tlab().make_parsable(retire_tlabs);
+#ifdef COMPILER2
+     // The deferred store barriers must all have been flushed to the
+     // card-table (or other remembered set structure) before GC starts
+     // processing the card-table (or other remembered set).
+     if (deferred) flush_deferred_store_barrier(thread);
+#else
+     assert(!deferred, "Should be false");
+     assert(thread->deferred_card_mark().is_empty(), "Should be empty");
+#endif
   }
 }
 
diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/gc_interface/collectedHeap.hpp
--- a/src/share/vm/gc_interface/collectedHeap.hpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/share/vm/gc_interface/collectedHeap.hpp	Fri Jan 22 15:06:53 2010 -0800
@@ -51,6 +51,9 @@
   // Used for filler objects (static, but initialized in ctor).
   static size_t _filler_array_max_size;
 
+  // Used in support of ReduceInitialCardMarks; only consulted if COMPILER2 is being used
+  bool _defer_initial_card_mark;
+
  protected:
   MemRegion _reserved;
   BarrierSet* _barrier_set;
@@ -70,13 +73,16 @@
   // Constructor
   CollectedHeap();
 
+  // Do common initializations that must follow instance construction,
+  // for example, those needing virtual calls.
+  // This code could perhaps be moved into initialize() but would
+  // be slightly more awkward because we want the latter to be a
+  // pure virtual.
+  void pre_initialize();
+
   // Create a new tlab
   virtual HeapWord* allocate_new_tlab(size_t size);
 
-  // Fix up tlabs to make the heap well-formed again,
-  // optionally retiring the tlabs.
-  virtual void fill_all_tlabs(bool retire);
-
   // Accumulate statistics on all tlabs.
   virtual void accumulate_statistics_all_tlabs();
 
@@ -127,14 +133,14 @@
   static inline size_t filler_array_max_size();
 
   DEBUG_ONLY(static void fill_args_check(HeapWord* start, size_t words);)
-  DEBUG_ONLY(static void zap_filler_array(HeapWord* start, size_t words);)
+  DEBUG_ONLY(static void zap_filler_array(HeapWord* start, size_t words, bool zap = true);)
 
   // Fill with a single array; caller must ensure filler_array_min_size() <=
   // words <= filler_array_max_size().
-  static inline void fill_with_array(HeapWord* start, size_t words);
+  static inline void fill_with_array(HeapWord* start, size_t words, bool zap = true);
 
   // Fill with a single object (either an int array or a java.lang.Object).
-  static inline void fill_with_object_impl(HeapWord* start, size_t words);
+  static inline void fill_with_object_impl(HeapWord* start, size_t words, bool zap = true);
 
   // Verification functions
   virtual void check_for_bad_heap_word_value(HeapWord* addr, size_t size)
@@ -338,14 +344,14 @@
     return size_t(align_object_size(oopDesc::header_size()));
   }
 
-  static void fill_with_objects(HeapWord* start, size_t words);
+  static void fill_with_objects(HeapWord* start, size_t words, bool zap = true);
 
-  static void fill_with_object(HeapWord* start, size_t words);
-  static void fill_with_object(MemRegion region) {
-    fill_with_object(region.start(), region.word_size());
+  static void fill_with_object(HeapWord* start, size_t words, bool zap = true);
+  static void fill_with_object(MemRegion region, bool zap = true) {
+    fill_with_object(region.start(), region.word_size(), zap);
   }
-  static void fill_with_object(HeapWord* start, HeapWord* end) {
-    fill_with_object(start, pointer_delta(end, start));
+  static void fill_with_object(HeapWord* start, HeapWord* end, bool zap = true) {
+    fill_with_object(start, pointer_delta(end, start), zap);
   }
 
   // Some heaps may offer a contiguous region for shared non-blocking
@@ -431,14 +437,25 @@
   // promises to call this function on such a slow-path-allocated
   // object before performing initializations that have elided
   // store barriers. Returns new_obj, or maybe a safer copy thereof.
-  virtual oop defer_store_barrier(JavaThread* thread, oop new_obj);
+  virtual oop new_store_pre_barrier(JavaThread* thread, oop new_obj);
 
   // Answers whether an initializing store to a new object currently
-  // allocated at the given address doesn't need a (deferred) store
+  // allocated at the given address doesn't need a store
   // barrier. Returns "true" if it doesn't need an initializing
   // store barrier; answers "false" if it does.
   virtual bool can_elide_initializing_store_barrier(oop new_obj) = 0;
 
+  // If a compiler is eliding store barriers for TLAB-allocated objects,
+  // we will be informed of a slow-path allocation by a call
+  // to new_store_pre_barrier() above. Such a call precedes the
+  // initialization of the object itself, and no post-store-barriers will
+  // be issued. Some heap types require that the barrier strictly follows
+  // the initializing stores. (This is currently implemented by deferring the
+  // barrier until the next slow-path allocation or gc-related safepoint.)
+  // This interface answers whether a particular heap type needs the card
+  // mark to be thus strictly sequenced after the stores.
+  virtual bool card_mark_must_follow_store() const = 0;
+
   // If the CollectedHeap was asked to defer a store barrier above,
   // this informs it to flush such a deferred store barrier to the
   // remembered set.
diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/includeDB_compiler2
--- a/src/share/vm/includeDB_compiler2	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/share/vm/includeDB_compiler2	Fri Jan 22 15:06:53 2010 -0800
@@ -601,6 +601,7 @@
 
 loopTransform.cpp                       addnode.hpp
 loopTransform.cpp                       allocation.inline.hpp
+loopTransform.cpp                       callnode.hpp
 loopTransform.cpp                       connode.hpp
 loopTransform.cpp                       compileLog.hpp
 loopTransform.cpp                       divnode.hpp
diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/interpreter/abstractInterpreter.hpp
--- a/src/share/vm/interpreter/abstractInterpreter.hpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/share/vm/interpreter/abstractInterpreter.hpp	Fri Jan 22 15:06:53 2010 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1997-2010 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -109,6 +109,8 @@
 
   static void       print_method_kind(MethodKind kind)          PRODUCT_RETURN;
 
+  static bool       can_be_compiled(methodHandle m);
+
   // Runtime support
 
   // length = invoke bytecode length (to advance to next bytecode)
diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/memory/genCollectedHeap.cpp
--- a/src/share/vm/memory/genCollectedHeap.cpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/share/vm/memory/genCollectedHeap.cpp	Fri Jan 22 15:06:53 2010 -0800
@@ -51,6 +51,8 @@
 }
 
 jint GenCollectedHeap::initialize() {
+  CollectedHeap::pre_initialize();
+
   int i;
   _n_gens = gen_policy()->number_of_generations();
 
@@ -129,6 +131,7 @@
 
   _rem_set = collector_policy()->create_rem_set(_reserved, n_covered_regions);
   set_barrier_set(rem_set()->bs());
+
   _gch = this;
 
   for (i = 0; i < _n_gens; i++) {
diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/memory/genCollectedHeap.hpp
--- a/src/share/vm/memory/genCollectedHeap.hpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/share/vm/memory/genCollectedHeap.hpp	Fri Jan 22 15:06:53 2010 -0800
@@ -260,6 +260,10 @@
     return true;
   }
 
+  virtual bool card_mark_must_follow_store() const {
+    return UseConcMarkSweepGC;
+  }
+
   // We don't need barriers for stores to objects in the
   // young gen and, a fortiori, for initializing stores to
   // objects therein. This applies to {DefNew,ParNew}+{Tenured,CMS}
diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/memory/threadLocalAllocBuffer.cpp
--- a/src/share/vm/memory/threadLocalAllocBuffer.cpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/share/vm/memory/threadLocalAllocBuffer.cpp	Fri Jan 22 15:06:53 2010 -0800
@@ -100,7 +100,7 @@
 void ThreadLocalAllocBuffer::make_parsable(bool retire) {
   if (end() != NULL) {
     invariants();
-    CollectedHeap::fill_with_object(top(), hard_end());
+    CollectedHeap::fill_with_object(top(), hard_end(), retire);
 
     if (retire || ZeroTLAB) {  // "Reset" the TLAB
       set_start(NULL);
diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/memory/threadLocalAllocBuffer.inline.hpp
--- a/src/share/vm/memory/threadLocalAllocBuffer.inline.hpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/share/vm/memory/threadLocalAllocBuffer.inline.hpp	Fri Jan 22 15:06:53 2010 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright 1999-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1999-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,8 +27,13 @@
   HeapWord* obj = top();
   if (pointer_delta(end(), obj) >= size) {
     // successful thread-local allocation
-
-    DEBUG_ONLY(Copy::fill_to_words(obj, size, badHeapWordVal));
+#ifdef ASSERT
+    // Skip mangling the space corresponding to the object header to
+    // ensure that the returned space is not considered parsable by
+    // any concurrent GC thread.
+    size_t hdr_size = CollectedHeap::min_fill_size();
+    Copy::fill_to_words(obj + hdr_size, size - hdr_size, badHeapWordVal);
+#endif // ASSERT
     // This addition is safe because we know that top is
     // at least size below end, so the add can't wrap.
     set_top(obj + size);
diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/opto/c2_globals.hpp
--- a/src/share/vm/opto/c2_globals.hpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/share/vm/opto/c2_globals.hpp	Fri Jan 22 15:06:53 2010 -0800
@@ -154,6 +154,12 @@
   notproduct(bool, TraceProfileTripCount, false,                            \
           "Trace profile loop trip count information")                      \
                                                                             \
+  product(bool, UseLoopPredicate, true,                                     \
+          "Generate a predicate to select fast/slow loop versions")         \
+                                                                            \
+  develop(bool, TraceLoopPredicate, false,                                  \
+          "Trace generation of loop predicates")                            \
+                                                                            \
   develop(bool, OptoCoalesce, true,                                         \
           "Use Conservative Copy Coalescing in the Register Allocator")     \
                                                                             \
diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/opto/compile.cpp
--- a/src/share/vm/opto/compile.cpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/share/vm/opto/compile.cpp	Fri Jan 22 15:06:53 2010 -0800
@@ -932,6 +932,7 @@
 
   _intrinsics = NULL;
   _macro_nodes = new GrowableArray<Node*>(comp_arena(), 8,  0, NULL);
+  _predicate_opaqs = new GrowableArray<Node*>(comp_arena(), 8,  0, NULL);
   register_library_intrinsics();
 }
 
@@ -1553,6 +1554,19 @@
   }
 }
 
+//---------------------cleanup_loop_predicates-----------------------
+// Remove the opaque nodes that protect the predicates so that all unused
+// checks and uncommon_traps will be eliminated from the ideal graph
+void Compile::cleanup_loop_predicates(PhaseIterGVN &igvn) {
+  if (predicate_count()==0) return;
+  for (int i = predicate_count(); i > 0; i--) {
+    Node * n = predicate_opaque1_node(i-1);
+    assert(n->Opcode() == Op_Opaque1, "must be");
+    igvn.replace_node(n, n->in(1));
+  }
+  assert(predicate_count()==0, "should be clean!");
+  igvn.optimize();
+}
 
 //------------------------------Optimize---------------------------------------
 // Given a graph, optimize it.
@@ -1594,7 +1608,7 @@
   if((loop_opts_cnt > 0) && (has_loops() || has_split_ifs())) {
     {
       TracePhase t2("idealLoop", &_t_idealLoop, true);
-      PhaseIdealLoop ideal_loop( igvn, true );
+      PhaseIdealLoop ideal_loop( igvn, true, UseLoopPredicate);
       loop_opts_cnt--;
       if (major_progress()) print_method("PhaseIdealLoop 1", 2);
       if (failing())  return;
@@ -1602,7 +1616,7 @@
     // Loop opts pass if partial peeling occurred in previous pass
     if(PartialPeelLoop && major_progress() && (loop_opts_cnt > 0)) {
       TracePhase t3("idealLoop", &_t_idealLoop, true);
-      PhaseIdealLoop ideal_loop( igvn, false );
+      PhaseIdealLoop ideal_loop( igvn, false, UseLoopPredicate);
       loop_opts_cnt--;
       if (major_progress()) print_method("PhaseIdealLoop 2", 2);
       if (failing())  return;
@@ -1610,7 +1624,7 @@
     // Loop opts pass for loop-unrolling before CCP
     if(major_progress() && (loop_opts_cnt > 0)) {
       TracePhase t4("idealLoop", &_t_idealLoop, true);
-      PhaseIdealLoop ideal_loop( igvn, false );
+      PhaseIdealLoop ideal_loop( igvn, false, UseLoopPredicate);
       loop_opts_cnt--;
       if (major_progress()) print_method("PhaseIdealLoop 3", 2);
     }
@@ -1648,13 +1662,21 @@
   // peeling, unrolling, etc.
   if(loop_opts_cnt > 0) {
     debug_only( int cnt = 0; );
+    bool loop_predication = UseLoopPredicate;
     while(major_progress() && (loop_opts_cnt > 0)) {
       TracePhase t2("idealLoop", &_t_idealLoop, true);
       assert( cnt++ < 40, "infinite cycle in loop optimization" );
-      PhaseIdealLoop ideal_loop( igvn, true );
+      PhaseIdealLoop ideal_loop( igvn, true, loop_predication);
       loop_opts_cnt--;
       if (major_progress()) print_method("PhaseIdealLoop iterations", 2);
       if (failing())  return;
+      // Perform loop predication optimization during first iteration after CCP.
+      // After that switch it off and cleanup unused loop predicates.
+      if (loop_predication) {
+        loop_predication = false;
+        cleanup_loop_predicates(igvn);
+        if (failing())  return;
+      }
     }
   }
 
diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/opto/compile.hpp
--- a/src/share/vm/opto/compile.hpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/share/vm/opto/compile.hpp	Fri Jan 22 15:06:53 2010 -0800
@@ -38,6 +38,7 @@
 class OptoReg;
 class PhaseCFG;
 class PhaseGVN;
+class PhaseIterGVN;
 class PhaseRegAlloc;
 class PhaseCCP;
 class PhaseCCP_DCE;
@@ -172,6 +173,7 @@
   const char*           _failure_reason;        // for record_failure/failing pattern
   GrowableArray<CallGenerator*>* _intrinsics;   // List of intrinsics.
   GrowableArray<Node*>* _macro_nodes;           // List of nodes which need to be expanded before matching.
+  GrowableArray<Node*>* _predicate_opaqs;       // List of Opaque1 nodes for the loop predicates.
   ConnectionGraph*      _congraph;
 #ifndef PRODUCT
   IdealGraphPrinter*    _printer;
@@ -351,7 +353,9 @@
   }
 
   int           macro_count()                   { return _macro_nodes->length(); }
+  int           predicate_count()               { return _predicate_opaqs->length();}
   Node*         macro_node(int idx)             { return _macro_nodes->at(idx); }
+  Node*         predicate_opaque1_node(int idx) { return _predicate_opaqs->at(idx);}
   ConnectionGraph* congraph()                   { return _congraph;}
   void add_macro_node(Node * n) {
     //assert(n->is_macro(), "must be a macro node");
@@ -363,7 +367,19 @@
     // that the node is in the array before attempting to remove it
     if (_macro_nodes->contains(n))
       _macro_nodes->remove(n);
+    // remove from _predicate_opaqs list also if it is there
+    if (predicate_count() > 0 && _predicate_opaqs->contains(n)){
+      _predicate_opaqs->remove(n);
+    }
   }
+  void add_predicate_opaq(Node * n) {
+    assert(!_predicate_opaqs->contains(n), " duplicate entry in predicate opaque1");
+    assert(_macro_nodes->contains(n), "should have already been in macro list");
+    _predicate_opaqs->append(n);
+  }
+  // remove the opaque nodes that protect the predicates so that the unused checks and
+  // uncommon traps will be eliminated from the graph.
+  void cleanup_loop_predicates(PhaseIterGVN &igvn);
 
   // Compilation environment.
   Arena*            comp_arena()                { return &_comp_arena; }
diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/opto/graphKit.cpp
--- a/src/share/vm/opto/graphKit.cpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/share/vm/opto/graphKit.cpp	Fri Jan 22 15:06:53 2010 -0800
@@ -3259,9 +3259,10 @@
   if (use_ReduceInitialCardMarks()
       && obj == just_allocated_object(control())) {
     // We can skip marks on a freshly-allocated object in Eden.
-    // Keep this code in sync with maybe_defer_card_mark() in runtime.cpp.
-    // That routine informs GC to take appropriate compensating steps
-    // so as to make this card-mark elision safe.
+    // Keep this code in sync with new_store_pre_barrier() in runtime.cpp.
+    // That routine informs GC to take appropriate compensating steps,
+    // upon a slow-path allocation, so as to make this card-mark
+    // elision safe.
     return;
   }
 
diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/opto/loopTransform.cpp
--- a/src/share/vm/opto/loopTransform.cpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/share/vm/opto/loopTransform.cpp	Fri Jan 22 15:06:53 2010 -0800
@@ -549,6 +549,10 @@
       // Comparing trip+off vs limit
       Node *bol = iff->in(1);
       if( bol->req() != 2 ) continue; // dead constant test
+      if (!bol->is_Bool()) {
+        assert(UseLoopPredicate && bol->Opcode() == Op_Conv2B, "predicate check only");
+        continue;
+      }
       Node *cmp = bol->in(1);
 
       Node *rc_exp = cmp->in(1);
@@ -875,7 +879,7 @@
 //------------------------------is_invariant-----------------------------
 // Return true if n is invariant
 bool IdealLoopTree::is_invariant(Node* n) const {
-  Node *n_c = _phase->get_ctrl(n);
+  Node *n_c = _phase->has_ctrl(n) ? _phase->get_ctrl(n) : n;
   if (n_c->is_top()) return false;
   return !is_member(_phase->get_loop(n_c));
 }
@@ -1594,7 +1598,7 @@
 bool IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new ) {
   // Check and remove empty loops (spam micro-benchmarks)
   if( policy_do_remove_empty_loop(phase) )
-    return true;                     // Here we removed an empty loop
+    return true;  // Here we removed an empty loop
 
   bool should_peel = policy_peeling(phase); // Should we peel?
 
@@ -1688,8 +1692,8 @@
     // an even number of trips).  If we are peeling, we might enable some RCE
     // and we'd rather unroll the post-RCE'd loop SO... do not unroll if
     // peeling.
-    if( should_unroll && !should_peel )
-      phase->do_unroll(this,old_new, true);
+      if( should_unroll && !should_peel )
+        phase->do_unroll(this,old_new, true);
 
     // Adjust the pre-loop limits to align the main body
     // iterations.
@@ -1731,9 +1735,9 @@
       _allow_optimizations &&
       !tail()->is_top() ) {     // Also ignore the occasional dead backedge
     if (!_has_call) {
-      if (!iteration_split_impl( phase, old_new )) {
-        return false;
-      }
+        if (!iteration_split_impl( phase, old_new )) {
+          return false;
+        }
     } else if (policy_unswitching(phase)) {
       phase->do_unswitching(this, old_new);
     }
@@ -1746,3 +1750,576 @@
     return false;
   return true;
 }
+
+//-------------------------------is_uncommon_trap_proj----------------------------
+// Return true if proj is the form of "proj->[region->..]call_uct"
+bool PhaseIdealLoop::is_uncommon_trap_proj(ProjNode* proj, bool must_reason_predicate) {
+  int path_limit = 10;
+  assert(proj, "invalid argument");
+  Node* out = proj;
+  for (int ct = 0; ct < path_limit; ct++) {
+    out = out->unique_ctrl_out();
+    if (out == NULL || out->is_Root() || out->is_Start())
+      return false;
+    if (out->is_CallStaticJava()) {
+      int req = out->as_CallStaticJava()->uncommon_trap_request();
+      if (req != 0) {
+        Deoptimization::DeoptReason reason = Deoptimization::trap_request_reason(req);
+        if (!must_reason_predicate || reason == Deoptimization::Reason_predicate){
+           return true;
+        }
+      }
+      return false; // don't do further after call
+    }
+  }
+  return false;
+}
+
+//-------------------------------is_uncommon_trap_if_pattern-------------------------
+// Return true  for "if(test)-> proj -> ...
+//                          |
+//                          V
+//                      other_proj->[region->..]call_uct"
+//
+// "must_reason_predicate" means the uct reason must be Reason_predicate
+bool PhaseIdealLoop::is_uncommon_trap_if_pattern(ProjNode *proj, bool must_reason_predicate) {
+  Node *in0 = proj->in(0);
+  if (!in0->is_If()) return false;
+  IfNode* iff = in0->as_If();
+
+  // we need "If(Conv2B(Opaque1(...)))" pattern for must_reason_predicate
+  if (must_reason_predicate) {
+    if (iff->in(1)->Opcode() != Op_Conv2B ||
+       iff->in(1)->in(1)->Opcode() != Op_Opaque1) {
+      return false;
+    }
+  }
+
+  ProjNode* other_proj = iff->proj_out(1-proj->_con)->as_Proj();
+  return is_uncommon_trap_proj(other_proj, must_reason_predicate);
+}
+
+//------------------------------create_new_if_for_predicate------------------------
+// create a new if above the uct_if_pattern for the predicate to be promoted.
+//
+//          before                                after
+//        ----------                           ----------
+//           ctrl                                 ctrl
+//            |                                     |
+//            |                                     |
+//            v                                     v
+//           iff                                 new_iff
+//          /    \                                /      \
+//         /      \                              /        \
+//        v        v                            v          v
+//  uncommon_proj cont_proj                   if_uct     if_cont
+// \      |        |                           |          |
+//  \     |        |                           |          |
+//   v    v        v                           |          v
+//     rgn       loop                          |         iff
+//      |                                      |        /     \
+//      |                                      |       /       \
+//      v                                      |      v         v
+// uncommon_trap                               | uncommon_proj cont_proj
+//                                           \  \    |           |
+//                                            \  \   |           |
+//                                             v  v  v           v
+//                                               rgn           loop
+//                                                |
+//                                                |
+//                                                v
+//                                           uncommon_trap
+//
+//
+// We will create a region to guard the uct call if there is no one there.
+// The true projecttion (if_cont) of the new_iff is returned.
+ProjNode* PhaseIdealLoop::create_new_if_for_predicate(ProjNode* cont_proj) {
+  assert(is_uncommon_trap_if_pattern(cont_proj, true), "must be a uct if pattern!");
+  IfNode* iff = cont_proj->in(0)->as_If();
+
+  ProjNode *uncommon_proj = iff->proj_out(1 - cont_proj->_con);
+  Node     *rgn   = uncommon_proj->unique_ctrl_out();
+  assert(rgn->is_Region() || rgn->is_Call(), "must be a region or call uct");
+
+  if (!rgn->is_Region()) { // create a region to guard the call
+    assert(rgn->is_Call(), "must be call uct");
+    CallNode* call = rgn->as_Call();
+    rgn = new (C, 1) RegionNode(1);
+    _igvn.set_type(rgn, rgn->bottom_type());
+    rgn->add_req(uncommon_proj);
+    set_idom(rgn, idom(uncommon_proj), dom_depth(uncommon_proj)+1);
+    _igvn.hash_delete(call);
+    call->set_req(0, rgn);
+  }
+
+  // Create new_iff
+  uint  iffdd  = dom_depth(iff);
+  IdealLoopTree* lp = get_loop(iff);
+  IfNode *new_iff = new (C, 2) IfNode(iff->in(0), NULL, iff->_prob, iff->_fcnt);
+  register_node(new_iff, lp, idom(iff), iffdd);
+  Node *if_cont = new (C, 1) IfTrueNode(new_iff);
+  Node *if_uct  = new (C, 1) IfFalseNode(new_iff);
+  if (cont_proj->is_IfFalse()) {
+    // Swap
+    Node* tmp = if_uct; if_uct = if_cont; if_cont = tmp;
+  }
+  register_node(if_cont, lp, new_iff, iffdd);
+  register_node(if_uct, get_loop(rgn), new_iff, iffdd);
+
+  // if_cont to iff
+  _igvn.hash_delete(iff);
+  iff->set_req(0, if_cont);
+  set_idom(iff, if_cont, dom_depth(iff));
+
+  // if_uct to rgn
+  _igvn.hash_delete(rgn);
+  rgn->add_req(if_uct);
+  Node* ridom = idom(rgn);
+  Node* nrdom = dom_lca(ridom, new_iff);
+  set_idom(rgn, nrdom, dom_depth(rgn));
+
+  // rgn must have no phis
+  assert(!rgn->as_Region()->has_phi(), "region must have no phis");
+
+  return if_cont->as_Proj();
+}
+
+//------------------------------find_predicate_insertion_point--------------------------
+// Find a good location to insert a predicate
+ProjNode* PhaseIdealLoop::find_predicate_insertion_point(Node* start_c) {
+  if (start_c == C->root() || !start_c->is_Proj())
+    return NULL;
+  if (is_uncommon_trap_if_pattern(start_c->as_Proj(), true/*Reason_Predicate*/)) {
+    return start_c->as_Proj();
+  }
+  return NULL;
+}
+
+//------------------------------Invariance-----------------------------------
+// Helper class for loop_predication_impl to compute invariance on the fly and
+// clone invariants.
+class Invariance : public StackObj {
+  VectorSet _visited, _invariant;
+  Node_Stack _stack;
+  VectorSet _clone_visited;
+  Node_List _old_new; // map of old to new (clone)
+  IdealLoopTree* _lpt;
+  PhaseIdealLoop* _phase;
+
+  // Helper function to set up the invariance for invariance computation
+  // If n is a known invariant, set up directly. Otherwise, look up the
+  // the possibility to push n onto the stack for further processing.
+  void visit(Node* use, Node* n) {
+    if (_lpt->is_invariant(n)) { // known invariant
+      _invariant.set(n->_idx);
+    } else if (!n->is_CFG()) {
+      Node *n_ctrl = _phase->ctrl_or_self(n);
+      Node *u_ctrl = _phase->ctrl_or_self(use); // self if use is a CFG
+      if (_phase->is_dominator(n_ctrl, u_ctrl)) {
+        _stack.push(n, n->in(0) == NULL ? 1 : 0);
+      }
+    }
+  }
+
+  // Compute invariance for "the_node" and (possibly) all its inputs recursively
+  // on the fly
+  void compute_invariance(Node* n) {
+    assert(_visited.test(n->_idx), "must be");
+    visit(n, n);
+    while (_stack.is_nonempty()) {
+      Node*  n = _stack.node();
+      uint idx = _stack.index();
+      if (idx == n->req()) { // all inputs are processed
+        _stack.pop();
+        // n is invariant if it's inputs are all invariant
+        bool all_inputs_invariant = true;
+        for (uint i = 0; i < n->req(); i++) {
+          Node* in = n->in(i);
+          if (in == NULL) continue;
+          assert(_visited.test(in->_idx), "must have visited input");
+          if (!_invariant.test(in->_idx)) { // bad guy
+            all_inputs_invariant = false;
+            break;
+          }
+        }
+        if (all_inputs_invariant) {
+          _invariant.set(n->_idx); // I am a invariant too
+        }
+      } else { // process next input
+        _stack.set_index(idx + 1);
+        Node* m = n->in(idx);
+        if (m != NULL && !_visited.test_set(m->_idx)) {
+          visit(n, m);
+        }
+      }
+    }
+  }
+
+  // Helper function to set up _old_new map for clone_nodes.
+  // If n is a known invariant, set up directly ("clone" of n == n).
+  // Otherwise, push n onto the stack for real cloning.
+  void clone_visit(Node* n) {
+    assert(_invariant.test(n->_idx), "must be invariant");
+    if (_lpt->is_invariant(n)) { // known invariant
+      _old_new.map(n->_idx, n);
+    } else{ // to be cloned
+      assert (!n->is_CFG(), "should not see CFG here");
+      _stack.push(n, n->in(0) == NULL ? 1 : 0);
+    }
+  }
+
+  // Clone "n" and (possibly) all its inputs recursively
+  void clone_nodes(Node* n, Node* ctrl) {
+    clone_visit(n);
+    while (_stack.is_nonempty()) {
+      Node*  n = _stack.node();
+      uint idx = _stack.index();
+      if (idx == n->req()) { // all inputs processed, clone n!
+        _stack.pop();
+        // clone invariant node
+        Node* n_cl = n->clone();
+        _old_new.map(n->_idx, n_cl);
+        _phase->register_new_node(n_cl, ctrl);
+        for (uint i = 0; i < n->req(); i++) {
+          Node* in = n_cl->in(i);
+          if (in == NULL) continue;
+          n_cl->set_req(i, _old_new[in->_idx]);
+        }
+      } else { // process next input
+        _stack.set_index(idx + 1);
+        Node* m = n->in(idx);
+        if (m != NULL && !_clone_visited.test_set(m->_idx)) {
+          clone_visit(m); // visit the input
+        }
+      }
+    }
+  }
+
+ public:
+  Invariance(Arena* area, IdealLoopTree* lpt) :
+    _lpt(lpt), _phase(lpt->_phase),
+    _visited(area), _invariant(area), _stack(area, 10 /* guess */),
+    _clone_visited(area), _old_new(area)
+  {}
+
+  // Map old to n for invariance computation and clone
+  void map_ctrl(Node* old, Node* n) {
+    assert(old->is_CFG() && n->is_CFG(), "must be");
+    _old_new.map(old->_idx, n); // "clone" of old is n
+    _invariant.set(old->_idx);  // old is invariant
+    _clone_visited.set(old->_idx);
+  }
+
+  // Driver function to compute invariance
+  bool is_invariant(Node* n) {
+    if (!_visited.test_set(n->_idx))
+      compute_invariance(n);
+    return (_invariant.test(n->_idx) != 0);
+  }
+
+  // Driver function to clone invariant
+  Node* clone(Node* n, Node* ctrl) {
+    assert(ctrl->is_CFG(), "must be");
+    assert(_invariant.test(n->_idx), "must be an invariant");
+    if (!_clone_visited.test(n->_idx))
+      clone_nodes(n, ctrl);
+    return _old_new[n->_idx];
+  }
+};
+
+//------------------------------is_range_check_if -----------------------------------
+// Returns true if the predicate of iff is in "scale*iv + offset u< load_range(ptr)" format
+// Note: this function is particularly designed for loop predication. We require load_range
+//       and offset to be loop invariant computed on the fly by "invar"
+bool IdealLoopTree::is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, Invariance& invar) const {
+  if (!is_loop_exit(iff)) {
+    return false;
+  }
+  if (!iff->in(1)->is_Bool()) {
+    return false;
+  }
+  const BoolNode *bol = iff->in(1)->as_Bool();
+  if (bol->_test._test != BoolTest::lt) {
+    return false;
+  }
+  if (!bol->in(1)->is_Cmp()) {
+    return false;
+  }
+  const CmpNode *cmp = bol->in(1)->as_Cmp();
+  if (cmp->Opcode() != Op_CmpU ) {
+    return false;
+  }
+  if (cmp->in(2)->Opcode() != Op_LoadRange) {
+    return false;
+  }
+  LoadRangeNode* lr = (LoadRangeNode*)cmp->in(2);
+  if (!invar.is_invariant(lr)) { // loadRange must be invariant
+    return false;
+  }
+  Node *iv     = _head->as_CountedLoop()->phi();
+  int   scale  = 0;
+  Node *offset = NULL;
+  if (!phase->is_scaled_iv_plus_offset(cmp->in(1), iv, &scale, &offset)) {
+    return false;
+  }
+  if(offset && !invar.is_invariant(offset)) { // offset must be invariant
+    return false;
+  }
+  return true;
+}
+
+//------------------------------rc_predicate-----------------------------------
+// Create a range check predicate
+//
+// for (i = init; i < limit; i += stride) {
+//    a[scale*i+offset]
+// }
+//
+// Compute max(scale*i + offset) for init <= i < limit and build the predicate
+// as "max(scale*i + offset) u< a.length".
+//
+// There are two cases for max(scale*i + offset):
+// (1) stride*scale > 0
+//   max(scale*i + offset) = scale*(limit-stride) + offset
+// (2) stride*scale < 0
+//   max(scale*i + offset) = scale*init + offset
+BoolNode* PhaseIdealLoop::rc_predicate(Node* ctrl,
+                                       int scale, Node* offset,
+                                       Node* init, Node* limit, Node* stride,
+                                       Node* range) {
+  Node* max_idx_expr  = init;
+  int stride_con = stride->get_int();
+  if ((stride_con > 0) == (scale > 0)) {
+    max_idx_expr = new (C, 3) SubINode(limit, stride);
+    register_new_node(max_idx_expr, ctrl);
+  }
+
+  if (scale != 1) {
+    ConNode* con_scale = _igvn.intcon(scale);
+    max_idx_expr = new (C, 3) MulINode(max_idx_expr, con_scale);
+    register_new_node(max_idx_expr, ctrl);
+  }
+
+  if (offset && (!offset->is_Con() || offset->get_int() != 0)){
+    max_idx_expr = new (C, 3) AddINode(max_idx_expr, offset);
+    register_new_node(max_idx_expr, ctrl);
+  }
+
+  CmpUNode* cmp = new (C, 3) CmpUNode(max_idx_expr, range);
+  register_new_node(cmp, ctrl);
+  BoolNode* bol = new (C, 2) BoolNode(cmp, BoolTest::lt);
+  register_new_node(bol, ctrl);
+  return bol;
+}
+
+//------------------------------ loop_predication_impl--------------------------
+// Insert loop predicates for null checks and range checks
+bool PhaseIdealLoop::loop_predication_impl(IdealLoopTree *loop) {
+  if (!UseLoopPredicate) return false;
+
+  // Too many traps seen?
+  bool tmt = C->too_many_traps(C->method(), 0, Deoptimization::Reason_predicate);
+  int tc = C->trap_count(Deoptimization::Reason_predicate);
+  if (tmt || tc > 0) {
+    if (TraceLoopPredicate) {
+      tty->print_cr("too many predicate traps: %d", tc);
+      C->method()->print(); // which method has too many predicate traps
+      tty->print_cr("");
+    }
+    return false;
+  }
+
+  CountedLoopNode *cl = NULL;
+  if (loop->_head->is_CountedLoop()) {
+    cl = loop->_head->as_CountedLoop();
+    // do nothing for iteration-splitted loops
+    if(!cl->is_normal_loop()) return false;
+  }
+
+  LoopNode *lpn  = loop->_head->as_Loop();
+  Node* entry = lpn->in(LoopNode::EntryControl);
+
+  ProjNode *predicate_proj = find_predicate_insertion_point(entry);
+  if (!predicate_proj){
+#ifndef PRODUCT
+    if (TraceLoopPredicate) {
+      tty->print("missing predicate:");
+      loop->dump_head();
+    }
+#endif
+    return false;
+  }
+
+  ConNode* zero = _igvn.intcon(0);
+  set_ctrl(zero, C->root());
+  Node *cond_false = new (C, 2) Conv2BNode(zero);
+  register_new_node(cond_false, C->root());
+  ConNode* one = _igvn.intcon(1);
+  set_ctrl(one, C->root());
+  Node *cond_true = new (C, 2) Conv2BNode(one);
+  register_new_node(cond_true, C->root());
+
+  ResourceArea *area = Thread::current()->resource_area();
+  Invariance invar(area, loop);
+
+  // Create list of if-projs such that a newer proj dominates all older
+  // projs in the list, and they all dominate loop->tail()
+  Node_List if_proj_list(area);
+  LoopNode *head  = loop->_head->as_Loop();
+  Node *current_proj = loop->tail(); //start from tail
+  while ( current_proj != head ) {
+    if (loop == get_loop(current_proj) && // still in the loop ?
+        current_proj->is_Proj()        && // is a projection  ?
+        current_proj->in(0)->Opcode() == Op_If) { // is a if projection ?
+      if_proj_list.push(current_proj);
+    }
+    current_proj = idom(current_proj);
+  }
+
+  bool hoisted = false; // true if at least one proj is promoted
+  while (if_proj_list.size() > 0) {
+    // Following are changed to nonnull when a predicate can be hoisted
+    ProjNode* new_predicate_proj = NULL;
+    BoolNode* new_predicate_bol   = NULL;
+
+    ProjNode* proj = if_proj_list.pop()->as_Proj();
+    IfNode*   iff  = proj->in(0)->as_If();
+
+    if (!is_uncommon_trap_if_pattern(proj)) {
+      if (loop->is_loop_exit(iff)) {
+        // stop processing the remaining projs in the list because the execution of them
+        // depends on the condition of "iff" (iff->in(1)).
+        break;
+      } else {
+        // Both arms are inside the loop. There are two cases:
+        // (1) there is one backward branch. In this case, any remaining proj
+        //     in the if_proj list post-dominates "iff". So, the condition of "iff"
+        //     does not determine the execution the remining projs directly, and we
+        //     can safely continue.
+        // (2) both arms are forwarded, i.e. a diamond shape. In this case, "proj"
+        //     does not dominate loop->tail(), so it can not be in the if_proj list.
+        continue;
+      }
+    }
+
+    Node*     test = iff->in(1);
+    if (!test->is_Bool()){ //Conv2B, ...
+      continue;
+    }
+    BoolNode* bol = test->as_Bool();
+    if (invar.is_invariant(bol)) {
+      // Invariant test
+      new_predicate_proj = create_new_if_for_predicate(predicate_proj);
+      Node* ctrl = new_predicate_proj->in(0)->as_If()->in(0);
+      new_predicate_bol  = invar.clone(bol, ctrl)->as_Bool();
+      if (TraceLoopPredicate) tty->print("invariant");
+    } else if (cl != NULL && loop->is_range_check_if(iff, this, invar)) {
+      // Range check (only for counted loops)
+      new_predicate_proj = create_new_if_for_predicate(predicate_proj);
+      Node *ctrl = new_predicate_proj->in(0)->as_If()->in(0);
+      const Node*    cmp    = bol->in(1)->as_Cmp();
+      Node*          idx    = cmp->in(1);
+      assert(!invar.is_invariant(idx), "index is variant");
+      assert(cmp->in(2)->Opcode() == Op_LoadRange, "must be");
+      LoadRangeNode* ld_rng = (LoadRangeNode*)cmp->in(2); // LoadRangeNode
+      assert(invar.is_invariant(ld_rng), "load range must be invariant");
+      ld_rng = (LoadRangeNode*)invar.clone(ld_rng, ctrl);
+      int scale    = 1;
+      Node* offset = zero;
+      bool ok = is_scaled_iv_plus_offset(idx, cl->phi(), &scale, &offset);
+      assert(ok, "must be index expression");
+      if (offset && offset != zero) {
+        assert(invar.is_invariant(offset), "offset must be loop invariant");
+        offset = invar.clone(offset, ctrl);
+      }
+      Node* init    = cl->init_trip();
+      Node* limit   = cl->limit();
+      Node* stride  = cl->stride();
+      new_predicate_bol = rc_predicate(ctrl, scale, offset, init, limit, stride, ld_rng);
+      if (TraceLoopPredicate) tty->print("range check");
+    }
+
+    if (new_predicate_proj == NULL) {
+      // The other proj of the "iff" is a uncommon trap projection, and we can assume
+      // the other proj will not be executed ("executed" means uct raised).
+      continue;
+    } else {
+      // Success - attach condition (new_predicate_bol) to predicate if
+      invar.map_ctrl(proj, new_predicate_proj); // so that invariance test can be appropriate
+      IfNode* new_iff = new_predicate_proj->in(0)->as_If();
+
+      // Negate test if necessary
+      if (proj->_con != predicate_proj->_con) {
+        new_predicate_bol = new (C, 2) BoolNode(new_predicate_bol->in(1), new_predicate_bol->_test.negate());
+        register_new_node(new_predicate_bol, new_iff->in(0));
+        if (TraceLoopPredicate) tty->print_cr(" if negated: %d", iff->_idx);
+      } else {
+        if (TraceLoopPredicate) tty->print_cr(" if: %d", iff->_idx);
+      }
+
+      _igvn.hash_delete(new_iff);
+      new_iff->set_req(1, new_predicate_bol);
+
+      _igvn.hash_delete(iff);
+      iff->set_req(1, proj->is_IfFalse() ? cond_false : cond_true);
+
+      Node* ctrl = new_predicate_proj; // new control
+      ProjNode* dp = proj;     // old control
+      assert(get_loop(dp) == loop, "guarenteed at the time of collecting proj");
+      // Find nodes (depends only on the test) off the surviving projection;
+      // move them outside the loop with the control of proj_clone
+      for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) {
+        Node* cd = dp->fast_out(i); // Control-dependent node
+        if (cd->depends_only_on_test()) {
+          assert(cd->in(0) == dp, "");
+          _igvn.hash_delete(cd);
+          cd->set_req(0, ctrl); // ctrl, not NULL
+          set_early_ctrl(cd);
+          _igvn._worklist.push(cd);
+          IdealLoopTree *new_loop = get_loop(get_ctrl(cd));
+          if (new_loop != loop) {
+            if (!loop->_child) loop->_body.yank(cd);
+            if (!new_loop->_child ) new_loop->_body.push(cd);
+          }
+          --i;
+          --imax;
+        }
+      }
+
+      hoisted = true;
+      C->set_major_progress();
+    }
+  } // end while
+
+#ifndef PRODUCT
+    // report that the loop predication has been actually performed
+    // for this loop
+    if (TraceLoopPredicate && hoisted) {
+      tty->print("Loop Predication Performed:");
+      loop->dump_head();
+    }
+#endif
+
+  return hoisted;
+}
+
+//------------------------------loop_predication--------------------------------
+// driver routine for loop predication optimization
+bool IdealLoopTree::loop_predication( PhaseIdealLoop *phase) {
+  bool hoisted = false;
+  // Recursively promote predicates
+  if ( _child ) {
+    hoisted = _child->loop_predication( phase);
+  }
+
+  // self
+  if (!_irreducible && !tail()->is_top()) {
+    hoisted |= phase->loop_predication_impl(this);
+  }
+
+  if ( _next ) { //sibling
+    hoisted |= _next->loop_predication( phase);
+  }
+
+  return hoisted;
+}
diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/opto/loopnode.cpp
--- a/src/share/vm/opto/loopnode.cpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/share/vm/opto/loopnode.cpp	Fri Jan 22 15:06:53 2010 -0800
@@ -1420,11 +1420,57 @@
   }
 }
 
+//---------------------collect_potentially_useful_predicates-----------------------
+// Helper function to collect potentially useful predicates to prevent them from
+// being eliminated by PhaseIdealLoop::eliminate_useless_predicates
+void PhaseIdealLoop::collect_potentially_useful_predicates(
+                         IdealLoopTree * loop, Unique_Node_List &useful_predicates) {
+  if (loop->_child) { // child
+    collect_potentially_useful_predicates(loop->_child, useful_predicates);
+  }
+
+  // self (only loops that we can apply loop predication may use their predicates)
+  if (loop->_head->is_Loop()     &&
+      !loop->_irreducible        &&
+      !loop->tail()->is_top()) {
+    LoopNode *lpn  = loop->_head->as_Loop();
+    Node* entry = lpn->in(LoopNode::EntryControl);
+    ProjNode *predicate_proj = find_predicate_insertion_point(entry);
+    if (predicate_proj != NULL ) { // right pattern that can be used by loop predication
+      assert(entry->in(0)->in(1)->in(1)->Opcode()==Op_Opaque1, "must be");
+      useful_predicates.push(entry->in(0)->in(1)->in(1)); // good one
+    }
+  }
+
+  if ( loop->_next ) { // sibling
+    collect_potentially_useful_predicates(loop->_next, useful_predicates);
+  }
+}
+
+//------------------------eliminate_useless_predicates-----------------------------
+// Eliminate all inserted predicates if they could not be used by loop predication.
+void PhaseIdealLoop::eliminate_useless_predicates() {
+  if (C->predicate_count() == 0) return; // no predicate left
+
+  Unique_Node_List useful_predicates; // to store useful predicates
+  if (C->has_loops()) {
+    collect_potentially_useful_predicates(_ltree_root->_child, useful_predicates);
+  }
+
+  for (int i = C->predicate_count(); i > 0; i--) {
+     Node * n = C->predicate_opaque1_node(i-1);
+     assert(n->Opcode() == Op_Opaque1, "must be");
+     if (!useful_predicates.member(n)) { // not in the useful list
+       _igvn.replace_node(n, n->in(1));
+     }
+  }
+}
+
 //=============================================================================
 //----------------------------build_and_optimize-------------------------------
 // Create a PhaseLoop.  Build the ideal Loop tree.  Map each Ideal Node to
 // its corresponding LoopNode.  If 'optimize' is true, do some loop cleanups.
-void PhaseIdealLoop::build_and_optimize(bool do_split_ifs) {
+void PhaseIdealLoop::build_and_optimize(bool do_split_ifs, bool do_loop_pred) {
   int old_progress = C->major_progress();
 
   // Reset major-progress flag for the driver's heuristics
@@ -1577,6 +1623,12 @@
     return;
   }
 
+  // some parser-inserted loop predicates could never be used by loop
+  // predication. Eliminate them before loop optimization
+  if (UseLoopPredicate) {
+    eliminate_useless_predicates();
+  }
+
   // clear out the dead code
   while(_deadlist.size()) {
     _igvn.remove_globally_dead_node(_deadlist.pop());
@@ -1603,7 +1655,7 @@
       // Because RCE opportunities can be masked by split_thru_phi,
       // look for RCE candidates and inhibit split_thru_phi
       // on just their loop-phi's for this pass of loop opts
-      if( SplitIfBlocks && do_split_ifs ) {
+      if (SplitIfBlocks && do_split_ifs) {
         if (lpt->policy_range_check(this)) {
           lpt->_rce_candidate = 1; // = true
         }
@@ -1619,12 +1671,17 @@
     NOT_PRODUCT( if( VerifyLoopOptimizations ) verify(); );
   }
 
+  // Perform loop predication before iteration splitting
+  if (do_loop_pred && C->has_loops() && !C->major_progress()) {
+    _ltree_root->_child->loop_predication(this);
+  }
+
   // Perform iteration-splitting on inner loops.  Split iterations to avoid
   // range checks or one-shot null checks.
 
   // If split-if's didn't hack the graph too bad (no CFG changes)
   // then do loop opts.
-  if( C->has_loops() && !C->major_progress() ) {
+  if (C->has_loops() && !C->major_progress()) {
     memset( worklist.adr(), 0, worklist.Size()*sizeof(Node*) );
     _ltree_root->_child->iteration_split( this, worklist );
     // No verify after peeling!  GCM has hoisted code out of the loop.
@@ -1636,7 +1693,7 @@
   // Do verify graph edges in any case
   NOT_PRODUCT( C->verify_graph_edges(); );
 
-  if( !do_split_ifs ) {
+  if (!do_split_ifs) {
     // We saw major progress in Split-If to get here.  We forced a
     // pass with unrolling and not split-if, however more split-if's
     // might make progress.  If the unrolling didn't make progress
@@ -2763,6 +2820,22 @@
   Node *legal = LCA;            // Walk 'legal' up the IDOM chain
   Node *least = legal;          // Best legal position so far
   while( early != legal ) {     // While not at earliest legal
+#ifdef ASSERT
+    if (legal->is_Start() && !early->is_Root()) {
+      // Bad graph. Print idom path and fail.
+      tty->print_cr( "Bad graph detected in build_loop_late");
+      tty->print("n: ");n->dump(); tty->cr();
+      tty->print("early: ");early->dump(); tty->cr();
+      int ct = 0;
+      Node *dbg_legal = LCA;
+      while(!dbg_legal->is_Start() && ct < 100) {
+        tty->print("idom[%d] ",ct); dbg_legal->dump(); tty->cr();
+        ct++;
+        dbg_legal = idom(dbg_legal);
+      }
+      assert(false, "Bad graph detected in build_loop_late");
+    }
+#endif
     // Find least loop nesting depth
     legal = idom(legal);        // Bump up the IDOM tree
     // Check for lower nesting depth
diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/opto/loopnode.hpp
--- a/src/share/vm/opto/loopnode.hpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/share/vm/opto/loopnode.hpp	Fri Jan 22 15:06:53 2010 -0800
@@ -30,6 +30,7 @@
 class Node;
 class PhaseIdealLoop;
 class VectorSet;
+class Invariance;
 struct small_cache;
 
 //
@@ -325,6 +326,10 @@
   // Returns TRUE if loop tree is structurally changed.
   bool beautify_loops( PhaseIdealLoop *phase );
 
+  // Perform optimization to use the loop predicates for null checks and range checks.
+  // Applies to any loop level (not just the innermost one)
+  bool loop_predication( PhaseIdealLoop *phase);
+
   // Perform iteration-splitting on inner loops.  Split iterations to
   // avoid range checks or one-shot null checks.  Returns false if the
   // current round of loop opts should stop.
@@ -395,6 +400,9 @@
   // into longer memory ops, we may want to increase alignment.
   bool policy_align( PhaseIdealLoop *phase ) const;
 
+  // Return TRUE if "iff" is a range check.
+  bool is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, Invariance& invar) const;
+
   // Compute loop trip count from profile data
   void compute_profile_trip_cnt( PhaseIdealLoop *phase );
 
@@ -521,9 +529,6 @@
   }
   Node *dom_lca_for_get_late_ctrl_internal( Node *lca, Node *n, Node *tag );
 
-  // true if CFG node d dominates CFG node n
-  bool is_dominator(Node *d, Node *n);
-
   // Helper function for directing control inputs away from CFG split
   // points.
   Node *find_non_split_ctrl( Node *ctrl ) const {
@@ -572,6 +577,17 @@
     assert(n == find_non_split_ctrl(n), "must return legal ctrl" );
     return n;
   }
+  // true if CFG node d dominates CFG node n
+  bool is_dominator(Node *d, Node *n);
+  // return get_ctrl for a data node and self(n) for a CFG node
+  Node* ctrl_or_self(Node* n) {
+    if (has_ctrl(n))
+      return get_ctrl(n);
+    else {
+      assert (n->is_CFG(), "must be a CFG node");
+      return n;
+    }
+  }
 
 private:
   Node *get_ctrl_no_update( Node *i ) const {
@@ -600,7 +616,7 @@
   // Lazy-dazy update of 'get_ctrl' and 'idom_at' mechanisms.  Replace
   // the 'old_node' with 'new_node'.  Kill old-node.  Add a reference
   // from old_node to new_node to support the lazy update.  Reference
-  // replaces loop reference, since that is not neede for dead node.
+  // replaces loop reference, since that is not needed for dead node.
 public:
   void lazy_update( Node *old_node, Node *new_node ) {
     assert( old_node != new_node, "no cycles please" );
@@ -679,11 +695,11 @@
     _dom_lca_tags(C->comp_arena()),
     _verify_me(NULL),
     _verify_only(true) {
-    build_and_optimize(false);
+    build_and_optimize(false, false);
   }
 
   // build the loop tree and perform any requested optimizations
-  void build_and_optimize(bool do_split_if);
+  void build_and_optimize(bool do_split_if, bool do_loop_pred);
 
 public:
   // Dominators for the sea of nodes
@@ -694,13 +710,13 @@
   Node *dom_lca_internal( Node *n1, Node *n2 ) const;
 
   // Compute the Ideal Node to Loop mapping
-  PhaseIdealLoop( PhaseIterGVN &igvn, bool do_split_ifs) :
+  PhaseIdealLoop( PhaseIterGVN &igvn, bool do_split_ifs, bool do_loop_pred) :
     PhaseTransform(Ideal_Loop),
     _igvn(igvn),
     _dom_lca_tags(C->comp_arena()),
     _verify_me(NULL),
     _verify_only(false) {
-    build_and_optimize(do_split_ifs);
+    build_and_optimize(do_split_ifs, do_loop_pred);
   }
 
   // Verify that verify_me made the same decisions as a fresh run.
@@ -710,7 +726,7 @@
     _dom_lca_tags(C->comp_arena()),
     _verify_me(verify_me),
     _verify_only(false) {
-    build_and_optimize(false);
+    build_and_optimize(false, false);
   }
 
   // Build and verify the loop tree without modifying the graph.  This
@@ -790,6 +806,30 @@
   // Return true if exp is a scaled induction var plus (or minus) constant
   bool is_scaled_iv_plus_offset(Node* exp, Node* iv, int* p_scale, Node** p_offset, int depth = 0);
 
+  // Return true if proj is for "proj->[region->..]call_uct"
+  bool is_uncommon_trap_proj(ProjNode* proj, bool must_reason_predicate = false);
+  // Return true for    "if(test)-> proj -> ...
+  //                          |
+  //                          V
+  //                      other_proj->[region->..]call_uct"
+  bool is_uncommon_trap_if_pattern(ProjNode* proj, bool must_reason_predicate = false);
+  // Create a new if above the uncommon_trap_if_pattern for the predicate to be promoted
+  ProjNode* create_new_if_for_predicate(ProjNode* cont_proj);
+  // Find a good location to insert a predicate
+  ProjNode* find_predicate_insertion_point(Node* start_c);
+  // Construct a range check for a predicate if
+  BoolNode* rc_predicate(Node* ctrl,
+                         int scale, Node* offset,
+                         Node* init, Node* limit, Node* stride,
+                         Node* range);
+
+  // Implementation of the loop predication to promote checks outside the loop
+  bool loop_predication_impl(IdealLoopTree *loop);
+
+  // Helper function to collect predicate for eliminating the useless ones
+  void collect_potentially_useful_predicates(IdealLoopTree *loop, Unique_Node_List &predicate_opaque1);
+  void eliminate_useless_predicates();
+
   // Eliminate range-checks and other trip-counter vs loop-invariant tests.
   void do_range_check( IdealLoopTree *loop, Node_List &old_new );
 
@@ -906,7 +946,6 @@
   const TypeInt* filtered_type_from_dominators( Node* val, Node *val_ctrl);
 
   // Helper functions
-  void register_new_node( Node *n, Node *blk );
   Node *spinup( Node *iff, Node *new_false, Node *new_true, Node *region, Node *phi, small_cache *cache );
   Node *find_use_block( Node *use, Node *def, Node *old_false, Node *new_false, Node *old_true, Node *new_true );
   void handle_use( Node *use, Node *def, small_cache *cache, Node *region_dom, Node *new_false, Node *new_true, Node *old_false, Node *old_true );
@@ -918,6 +957,7 @@
 public:
   void set_created_loop_node() { _created_loop_node = true; }
   bool created_loop_node()     { return _created_loop_node; }
+  void register_new_node( Node *n, Node *blk );
 
 #ifndef PRODUCT
   void dump( ) const;
diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/opto/parse.hpp
--- a/src/share/vm/opto/parse.hpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/share/vm/opto/parse.hpp	Fri Jan 22 15:06:53 2010 -0800
@@ -430,6 +430,11 @@
     }
   }
 
+  // Return true if the parser should add a loop predicate
+  bool should_add_predicate(int target_bci);
+  // Insert a loop predicate into the graph
+  void add_predicate();
+
   // Note:  Intrinsic generation routines may be found in library_call.cpp.
 
   // Helper function to setup Ideal Call nodes
@@ -491,7 +496,7 @@
 
   void    do_ifnull(BoolTest::mask btest, Node* c);
   void    do_if(BoolTest::mask btest, Node* c);
-  void    repush_if_args();
+  int     repush_if_args();
   void    adjust_map_after_if(BoolTest::mask btest, Node* c, float prob,
                               Block* path, Block* other_path);
   IfNode* jump_if_fork_int(Node* a, Node* b, BoolTest::mask mask);
diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/opto/parse1.cpp
--- a/src/share/vm/opto/parse1.cpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/share/vm/opto/parse1.cpp	Fri Jan 22 15:06:53 2010 -0800
@@ -1383,6 +1383,10 @@
     set_parse_bci(iter().cur_bci());
 
     if (bci() == block()->limit()) {
+      // insert a predicate if it falls through to a loop head block
+      if (should_add_predicate(bci())){
+        add_predicate();
+      }
       // Do not walk into the next block until directed by do_all_blocks.
       merge(bci());
       break;
@@ -2083,6 +2087,37 @@
   }
 }
 
+//------------------------------should_add_predicate--------------------------
+bool Parse::should_add_predicate(int target_bci) {
+  if (!UseLoopPredicate) return false;
+  Block* target = successor_for_bci(target_bci);
+  if (target != NULL          &&
+      target->is_loop_head()  &&
+      block()->rpo() < target->rpo()) {
+    return true;
+  }
+  return false;
+}
+
+//------------------------------add_predicate---------------------------------
+void Parse::add_predicate() {
+  assert(UseLoopPredicate,"use only for loop predicate");
+  Node *cont    = _gvn.intcon(1);
+  Node* opq     = _gvn.transform(new (C, 2) Opaque1Node(C, cont));
+  Node *bol     = _gvn.transform(new (C, 2) Conv2BNode(opq));
+  IfNode* iff   = create_and_map_if(control(), bol, PROB_MAX, COUNT_UNKNOWN);
+  Node* iffalse = _gvn.transform(new (C, 1) IfFalseNode(iff));
+  C->add_predicate_opaq(opq);
+  {
+    PreserveJVMState pjvms(this);
+    set_control(iffalse);
+    uncommon_trap(Deoptimization::Reason_predicate,
+                  Deoptimization::Action_maybe_recompile);
+  }
+  Node* iftrue = _gvn.transform(new (C, 1) IfTrueNode(iff));
+  set_control(iftrue);
+}
+
 #ifndef PRODUCT
 //------------------------show_parse_info--------------------------------------
 void Parse::show_parse_info() {
diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/opto/parse2.cpp
--- a/src/share/vm/opto/parse2.cpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/share/vm/opto/parse2.cpp	Fri Jan 22 15:06:53 2010 -0800
@@ -278,6 +278,11 @@
   if (len < 1) {
     // If this is a backward branch, add safepoint
     maybe_add_safepoint(default_dest);
+    if (should_add_predicate(default_dest)){
+      _sp += 1; // set original stack for use by uncommon_trap
+      add_predicate();
+      _sp -= 1;
+    }
     merge(default_dest);
     return;
   }
@@ -324,6 +329,11 @@
 
   if (len < 1) {    // If this is a backward branch, add safepoint
     maybe_add_safepoint(default_dest);
+    if (should_add_predicate(default_dest)){
+      _sp += 1; // set original stack for use by uncommon_trap
+      add_predicate();
+      _sp -= 1;
+    }
     merge(default_dest);
     return;
   }
@@ -731,6 +741,9 @@
   push(_gvn.makecon(ret_addr));
 
   // Flow to the jsr.
+  if (should_add_predicate(jsr_bci)){
+    add_predicate();
+  }
   merge(jsr_bci);
 }
 
@@ -881,7 +894,7 @@
 
 //-------------------------------repush_if_args--------------------------------
 // Push arguments of an "if" bytecode back onto the stack by adjusting _sp.
-inline void Parse::repush_if_args() {
+inline int Parse::repush_if_args() {
 #ifndef PRODUCT
   if (PrintOpto && WizardMode) {
     tty->print("defending against excessive implicit null exceptions on %s @%d in ",
@@ -895,6 +908,7 @@
   assert(argument(0) != NULL, "must exist");
   assert(bc_depth == 1 || argument(1) != NULL, "two must exist");
   _sp += bc_depth;
+  return bc_depth;
 }
 
 //----------------------------------do_ifnull----------------------------------
@@ -954,8 +968,14 @@
       // Update method data
       profile_taken_branch(target_bci);
       adjust_map_after_if(btest, c, prob, branch_block, next_block);
-      if (!stopped())
+      if (!stopped()) {
+        if (should_add_predicate(target_bci)){ // add a predicate if it branches to a loop
+          int nargs = repush_if_args(); // set original stack for uncommon_trap
+          add_predicate();
+          _sp -= nargs;
+        }
         merge(target_bci);
+      }
     }
   }
 
@@ -1076,8 +1096,14 @@
       // Update method data
       profile_taken_branch(target_bci);
       adjust_map_after_if(taken_btest, c, prob, branch_block, next_block);
-      if (!stopped())
+      if (!stopped()) {
+        if (should_add_predicate(target_bci)){ // add a predicate if it branches to a loop
+          int nargs = repush_if_args(); // set original stack for the uncommon_trap
+          add_predicate();
+          _sp -= nargs;
+        }
         merge(target_bci);
+      }
     }
   }
 
@@ -2080,6 +2106,10 @@
     // Update method data
     profile_taken_branch(target_bci);
 
+    // Add loop predicate if it goes to a loop
+    if (should_add_predicate(target_bci)){
+      add_predicate();
+    }
     // Merge the current control into the target basic block
     merge(target_bci);
 
diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/opto/runtime.cpp
--- a/src/share/vm/opto/runtime.cpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/share/vm/opto/runtime.cpp	Fri Jan 22 15:06:53 2010 -0800
@@ -143,7 +143,7 @@
 // We failed the fast-path allocation.  Now we need to do a scavenge or GC
 // and try allocation again.
 
-void OptoRuntime::maybe_defer_card_mark(JavaThread* thread) {
+void OptoRuntime::new_store_pre_barrier(JavaThread* thread) {
   // After any safepoint, just before going back to compiled code,
   // we inform the GC that we will be doing initializing writes to
   // this object in the future without emitting card-marks, so
@@ -156,7 +156,7 @@
   assert(Universe::heap()->can_elide_tlab_store_barriers(),
          "compiler must check this first");
   // GC may decide to give back a safer copy of new_obj.
-  new_obj = Universe::heap()->defer_store_barrier(thread, new_obj);
+  new_obj = Universe::heap()->new_store_pre_barrier(thread, new_obj);
   thread->set_vm_result(new_obj);
 }
 
@@ -200,7 +200,7 @@
 
   if (GraphKit::use_ReduceInitialCardMarks()) {
     // inform GC that we won't do card marks for initializing writes.
-    maybe_defer_card_mark(thread);
+    new_store_pre_barrier(thread);
   }
 JRT_END
 
@@ -239,7 +239,7 @@
 
   if (GraphKit::use_ReduceInitialCardMarks()) {
     // inform GC that we won't do card marks for initializing writes.
-    maybe_defer_card_mark(thread);
+    new_store_pre_barrier(thread);
   }
 JRT_END
 
diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/opto/runtime.hpp
--- a/src/share/vm/opto/runtime.hpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/share/vm/opto/runtime.hpp	Fri Jan 22 15:06:53 2010 -0800
@@ -133,8 +133,9 @@
   // Allocate storage for a objArray or typeArray
   static void new_array_C(klassOopDesc* array_klass, int len, JavaThread *thread);
 
-  // Post-slow-path-allocation step for implementing ReduceInitialCardMarks:
-  static void maybe_defer_card_mark(JavaThread* thread);
+  // Post-slow-path-allocation, pre-initializing-stores step for
+  // implementing ReduceInitialCardMarks
+  static void new_store_pre_barrier(JavaThread* thread);
 
   // Allocate storage for a multi-dimensional arrays
   // Note: needs to be fixed for arbitrary number of dimensions
diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/opto/split_if.cpp
--- a/src/share/vm/opto/split_if.cpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/share/vm/opto/split_if.cpp	Fri Jan 22 15:06:53 2010 -0800
@@ -219,6 +219,7 @@
 
 //------------------------------register_new_node------------------------------
 void PhaseIdealLoop::register_new_node( Node *n, Node *blk ) {
+  assert(!n->is_CFG(), "must be data node");
   _igvn.register_new_node_with_optimizer(n);
   set_ctrl(n, blk);
   IdealLoopTree *loop = get_loop(blk);
diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/opto/subnode.cpp
--- a/src/share/vm/opto/subnode.cpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/share/vm/opto/subnode.cpp	Fri Jan 22 15:06:53 2010 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1997-2010 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1244,8 +1244,7 @@
   if( t1 == Type::TOP ) return Type::TOP;
   if( t1->base() != Type::DoubleCon ) return Type::DOUBLE;
   double d = t1->getd();
-  if( d < 0.0 ) return Type::DOUBLE;
-  return TypeD::make( SharedRuntime::dcos( d ) );
+  return TypeD::make( StubRoutines::intrinsic_cos( d ) );
 }
 
 //=============================================================================
@@ -1256,8 +1255,7 @@
   if( t1 == Type::TOP ) return Type::TOP;
   if( t1->base() != Type::DoubleCon ) return Type::DOUBLE;
   double d = t1->getd();
-  if( d < 0.0 ) return Type::DOUBLE;
-  return TypeD::make( SharedRuntime::dsin( d ) );
+  return TypeD::make( StubRoutines::intrinsic_sin( d ) );
 }
 
 //=============================================================================
@@ -1268,8 +1266,7 @@
   if( t1 == Type::TOP ) return Type::TOP;
   if( t1->base() != Type::DoubleCon ) return Type::DOUBLE;
   double d = t1->getd();
-  if( d < 0.0 ) return Type::DOUBLE;
-  return TypeD::make( SharedRuntime::dtan( d ) );
+  return TypeD::make( StubRoutines::intrinsic_tan( d ) );
 }
 
 //=============================================================================
@@ -1280,8 +1277,7 @@
   if( t1 == Type::TOP ) return Type::TOP;
   if( t1->base() != Type::DoubleCon ) return Type::DOUBLE;
   double d = t1->getd();
-  if( d < 0.0 ) return Type::DOUBLE;
-  return TypeD::make( SharedRuntime::dlog( d ) );
+  return TypeD::make( StubRoutines::intrinsic_log( d ) );
 }
 
 //=============================================================================
@@ -1292,8 +1288,7 @@
   if( t1 == Type::TOP ) return Type::TOP;
   if( t1->base() != Type::DoubleCon ) return Type::DOUBLE;
   double d = t1->getd();
-  if( d < 0.0 ) return Type::DOUBLE;
-  return TypeD::make( SharedRuntime::dlog10( d ) );
+  return TypeD::make( StubRoutines::intrinsic_log10( d ) );
 }
 
 //=============================================================================
@@ -1304,8 +1299,7 @@
   if( t1 == Type::TOP ) return Type::TOP;
   if( t1->base() != Type::DoubleCon ) return Type::DOUBLE;
   double d = t1->getd();
-  if( d < 0.0 ) return Type::DOUBLE;
-  return TypeD::make( SharedRuntime::dexp( d ) );
+  return TypeD::make( StubRoutines::intrinsic_exp( d ) );
 }
 
 
@@ -1323,5 +1317,5 @@
   double d2 = t2->getd();
   if( d1 < 0.0 ) return Type::DOUBLE;
   if( d2 < 0.0 ) return Type::DOUBLE;
-  return TypeD::make( SharedRuntime::dpow( d1, d2 ) );
+  return TypeD::make( StubRoutines::intrinsic_pow( d1, d2 ) );
 }
diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/runtime/compilationPolicy.cpp
--- a/src/share/vm/runtime/compilationPolicy.cpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/share/vm/runtime/compilationPolicy.cpp	Fri Jan 22 15:06:53 2010 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright 2000-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2000-2010 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -74,6 +74,16 @@
   if (m->is_abstract()) return false;
   if (DontCompileHugeMethods && m->code_size() > HugeMethodLimit) return false;
 
+  // Math intrinsics should never be compiled as this can lead to
+  // monotonicity problems because the interpreter will prefer the
+  // compiled code to the intrinsic version.  This can't happen in
+  // production because the invocation counter can't be incremented
+  // but we shouldn't expose the system to this problem in testing
+  // modes.
+  if (!AbstractInterpreter::can_be_compiled(m)) {
+    return false;
+  }
+
   return !m->is_not_compilable();
 }
 
diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/runtime/deoptimization.cpp
--- a/src/share/vm/runtime/deoptimization.cpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/share/vm/runtime/deoptimization.cpp	Fri Jan 22 15:06:53 2010 -0800
@@ -1672,7 +1672,8 @@
   "unhandled",
   "constraint",
   "div0_check",
-  "age"
+  "age",
+  "predicate"
 };
 const char* Deoptimization::_trap_action_name[Action_LIMIT] = {
   // Note:  Keep this in sync. with enum DeoptAction.
diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/runtime/deoptimization.hpp
--- a/src/share/vm/runtime/deoptimization.hpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/share/vm/runtime/deoptimization.hpp	Fri Jan 22 15:06:53 2010 -0800
@@ -46,6 +46,7 @@
     Reason_constraint,            // arbitrary runtime constraint violated
     Reason_div0_check,            // a null_check due to division by zero
     Reason_age,                   // nmethod too old; tier threshold reached
+    Reason_predicate,             // compiler generated predicate failed
     Reason_LIMIT,
     // Note:  Keep this enum in sync. with _trap_reason_name.
     Reason_RECORDED_LIMIT = Reason_unloaded   // some are not recorded per bc
diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/runtime/globals.hpp
--- a/src/share/vm/runtime/globals.hpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/share/vm/runtime/globals.hpp	Fri Jan 22 15:06:53 2010 -0800
@@ -2012,6 +2012,10 @@
   diagnostic(bool, GCParallelVerificationEnabled, true,                     \
           "Enable parallel memory system verification")                     \
                                                                             \
+  diagnostic(bool, DeferInitialCardMark, false,                             \
+          "When +ReduceInitialCardMarks, explicitly defer any that "        \
+           "may arise from new_pre_store_barrier")                          \
+                                                                            \
   diagnostic(bool, VerifyRememberedSets, false,                             \
           "Verify GC remembered sets")                                      \
                                                                             \
@@ -3456,6 +3460,9 @@
   diagnostic(bool, OptimizeMethodHandles, true,                             \
           "when constructing method handles, try to improve them")          \
                                                                             \
+  experimental(bool, TrustFinalNonStaticFields, false,                      \
+          "trust final non-static declarations for constant folding")       \
+                                                                            \
   experimental(bool, EnableInvokeDynamic, false,                            \
           "recognize the invokedynamic instruction")                        \
                                                                             \
diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/runtime/stubRoutines.cpp
--- a/src/share/vm/runtime/stubRoutines.cpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/share/vm/runtime/stubRoutines.cpp	Fri Jan 22 15:06:53 2010 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright 1997-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1997-2010 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -97,6 +97,14 @@
 address StubRoutines::_unsafe_arraycopy                  = NULL;
 address StubRoutines::_generic_arraycopy                 = NULL;
 
+double (* StubRoutines::_intrinsic_log   )(double) = NULL;
+double (* StubRoutines::_intrinsic_log10 )(double) = NULL;
+double (* StubRoutines::_intrinsic_exp   )(double) = NULL;
+double (* StubRoutines::_intrinsic_pow   )(double, double) = NULL;
+double (* StubRoutines::_intrinsic_sin   )(double) = NULL;
+double (* StubRoutines::_intrinsic_cos   )(double) = NULL;
+double (* StubRoutines::_intrinsic_tan   )(double) = NULL;
+
 // Initialization
 //
 // Note: to break cycle with universe initialization, stubs are generated in two phases.
diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/runtime/stubRoutines.hpp
--- a/src/share/vm/runtime/stubRoutines.hpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/share/vm/runtime/stubRoutines.hpp	Fri Jan 22 15:06:53 2010 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright 1997-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1997-2010 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -148,6 +148,20 @@
   static address _unsafe_arraycopy;
   static address _generic_arraycopy;
 
+  // These are versions of the java.lang.Math methods which perform
+  // the same operations as the intrinsic version.  They are used for
+  // constant folding in the compiler to ensure equivalence.  If the
+  // intrinsic version returns the same result as the strict version
+  // then they can be set to the appropriate function from
+  // SharedRuntime.
+  static double (*_intrinsic_log)(double);
+  static double (*_intrinsic_log10)(double);
+  static double (*_intrinsic_exp)(double);
+  static double (*_intrinsic_pow)(double, double);
+  static double (*_intrinsic_sin)(double);
+  static double (*_intrinsic_cos)(double);
+  static double (*_intrinsic_tan)(double);
+
  public:
   // Initialization/Testing
   static void    initialize1();                            // must happen before universe::genesis
@@ -245,6 +259,35 @@
   static address unsafe_arraycopy()        { return _unsafe_arraycopy; }
   static address generic_arraycopy()       { return _generic_arraycopy; }
 
+  static double  intrinsic_log(double d) {
+    assert(_intrinsic_log != NULL, "must be defined");
+    return _intrinsic_log(d);
+  }
+  static double  intrinsic_log10(double d) {
+    assert(_intrinsic_log != NULL, "must be defined");
+    return _intrinsic_log10(d);
+  }
+  static double  intrinsic_exp(double d) {
+    assert(_intrinsic_exp != NULL, "must be defined");
+    return _intrinsic_exp(d);
+  }
+  static double  intrinsic_pow(double d, double d2) {
+    assert(_intrinsic_pow != NULL, "must be defined");
+    return _intrinsic_pow(d, d2);
+  }
+  static double  intrinsic_sin(double d) {
+    assert(_intrinsic_sin != NULL, "must be defined");
+    return _intrinsic_sin(d);
+  }
+  static double  intrinsic_cos(double d) {
+    assert(_intrinsic_cos != NULL, "must be defined");
+    return _intrinsic_cos(d);
+  }
+  static double  intrinsic_tan(double d) {
+    assert(_intrinsic_tan != NULL, "must be defined");
+    return _intrinsic_tan(d);
+  }
+
   //
   // Default versions of the above arraycopy functions for platforms which do
   // not have specialized versions
diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/runtime/thread.cpp
--- a/src/share/vm/runtime/thread.cpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/share/vm/runtime/thread.cpp	Fri Jan 22 15:06:53 2010 -0800
@@ -2357,9 +2357,8 @@
 };
 
 void JavaThread::oops_do(OopClosure* f, CodeBlobClosure* cf) {
-  // Flush deferred store-barriers, if any, associated with
-  // initializing stores done by this JavaThread in the current epoch.
-  Universe::heap()->flush_deferred_store_barrier(this);
+  // Verify that the deferred card marks have been flushed.
+  assert(deferred_card_mark().is_empty(), "Should be empty during GC");
 
   // The ThreadProfiler oops_do is done from FlatProfiler::oops_do
   // since there may be more than one thread using each ThreadProfiler.
diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/runtime/vmStructs.cpp
--- a/src/share/vm/runtime/vmStructs.cpp	Wed Jan 20 11:32:41 2010 -0700
+++ b/src/share/vm/runtime/vmStructs.cpp	Fri Jan 22 15:06:53 2010 -0800
@@ -309,6 +309,7 @@
   nonstatic_field(CollectedHeap,               _reserved,                                     MemRegion)                             \
   nonstatic_field(SharedHeap,                  _perm_gen,                                     PermGen*)                              \
   nonstatic_field(CollectedHeap,               _barrier_set,                                  BarrierSet*)                           \
+  nonstatic_field(CollectedHeap,               _defer_initial_card_mark,                      bool)                                  \
   nonstatic_field(CollectedHeap,               _is_gc_active,                                 bool)                                  \
   nonstatic_field(CompactibleSpace,            _compaction_top,                               HeapWord*)                             \
   nonstatic_field(CompactibleSpace,            _first_dead,                                   HeapWord*)                             \
diff -r 3908ad124838 -r 2718ec34c699 test/compiler/6877254/Test.java
--- a/test/compiler/6877254/Test.java	Wed Jan 20 11:32:41 2010 -0700
+++ b/test/compiler/6877254/Test.java	Fri Jan 22 15:06:53 2010 -0800
@@ -26,7 +26,7 @@
  * @bug 6877254
  * @summary Implement StoreCMNode::Ideal to promote its OopStore above the MergeMem
  *
- * @run main/othervm -server -Xcomp -XX:+UseConcMarkSweepGC Test
+ * @run main/othervm -Xcomp Test
  */
 
 public class Test {
diff -r 3908ad124838 -r 2718ec34c699 test/compiler/6895383/Test.java
--- a/test/compiler/6895383/Test.java	Wed Jan 20 11:32:41 2010 -0700
+++ b/test/compiler/6895383/Test.java	Fri Jan 22 15:06:53 2010 -0800
@@ -30,6 +30,9 @@
  * @run main/othervm -Xcomp Test
  */
 
+import java.util.*;
+import java.util.concurrent.*;
+
 public class Test {
     public static void main(String argv[]) {
         Test test = new Test();
diff -r 3908ad124838 -r 2718ec34c699 test/compiler/6896727/Test.java
--- a/test/compiler/6896727/Test.java	Wed Jan 20 11:32:41 2010 -0700
+++ b/test/compiler/6896727/Test.java	Fri Jan 22 15:06:53 2010 -0800
@@ -26,7 +26,7 @@
  * @test
  * @bug 6896727
  * @summary nsk/logging/LoggingPermission/LoggingPermission/logperm002 fails with G1, EscapeAnalisys w/o COOPs
- * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -Xcomp -XX:+DoEscapeAnalysis -XX:+UnlockExperimentalVMOptions -XX:+UseG1GC Test
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -Xcomp -XX:+DoEscapeAnalysis Test
  */
 
 public class Test {