changeset 4988:eb5b24d1499f

Merge
author Christian Haeubl <christian.haeubl@oracle.com>
date Mon, 27 Feb 2012 15:06:36 -0800
parents f292f9c590ba (current diff) 0d2a2797a61f (diff)
children 09f57983f4bc
files src/os/bsd/vm/decoder_bsd.cpp
diffstat 451 files changed, 18921 insertions(+), 10829 deletions(-) [+]
line wrap: on
line diff
--- a/.hgtags	Mon Feb 27 14:50:58 2012 -0800
+++ b/.hgtags	Mon Feb 27 15:06:36 2012 -0800
@@ -205,3 +205,24 @@
 d1f29d4e0bc60e8bd7ae961f1306d8ab33290212 jdk8-b16
 6de8c9ba5907e4c5ca05ac4b8d84a8e2cbd92399 hs23-b07
 a2fef924d8e6f37dac2a887315e3502876cc8e24 hs23-b08
+61165f53f1656b9f99e4fb806429bf98b99d59c3 jdk8-b18
+4bcf61041217f8677dcec18e90e9196acc945bba hs23-b09
+9232e0ecbc2cec54dcc8f93004fb00c214446460 jdk8-b19
+fe2c8764998112b7fefcd7d41599714813ae4327 jdk8-b20
+9952d1c439d64c5fd4ad1236a63a62bd5a49d4c3 jdk8-b21
+513351373923f74a7c91755748b95c9771e59f96 hs23-b10
+24727fb37561779077fdfa5a33342246f20e5c0f jdk8-b22
+dcc292399a39113957eebbd3e487b7e05e2c79fc hs23-b11
+e850d8e7ea54b91c7aa656e297f0f9f38dd4c296 jdk8-b23
+9e177d44b10fe92ecffa965fef9c5ac5433c1b46 hs23-b12
+a80fd4f45d7aaa154ed2f86a129f3c9c4035ec7a jdk8-b24
+b22de824749922986ce4d442bed029916b832807 hs23-b13
+64b46f975ab82948c1e021e17775ff4fab8bc40e hs23-b14
+9ad8feb5afbddec46d3cfe29fb5f73c2e99d5a43 jdk8-b25
+d71e662fe03741b6de498ca2077220148405a978 hs23-b15
+fd3060701216a11c0df6dcd053c6fd7c2b17a42c jdk8-b26
+f92a171cf0071ca6c3fa8231d7d570377f8b2f4d hs23-b16
+f92a171cf0071ca6c3fa8231d7d570377f8b2f4d hs23-b16
+931e5f39e365a0d550d79148ff87a7f9e864d2e1 hs23-b16
+3b24e7e01d20ca590d0f86b1222bb7c3f1a2aa2d jdk8-b27
+975c4105f1e2ef1190a75b77124033f1fd4290b5 hs24-b01
--- a/agent/src/os/linux/Makefile	Mon Feb 27 14:50:58 2012 -0800
+++ b/agent/src/os/linux/Makefile	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2002, 2009, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -40,7 +40,7 @@
 
 LIBS     = -lthread_db
 
-CFLAGS   = -c -fPIC -g -D_GNU_SOURCE -D$(ARCH) $(INCLUDES)
+CFLAGS   = -c -fPIC -g -D_GNU_SOURCE -D$(ARCH) $(INCLUDES) -D_FILE_OFFSET_BITS=64
 
 LIBSA = $(ARCH)/libsaproc.so
 
--- a/agent/src/os/linux/libproc_impl.c	Mon Feb 27 14:50:58 2012 -0800
+++ b/agent/src/os/linux/libproc_impl.c	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -50,10 +50,6 @@
    char alt_path[PATH_MAX + 1];
 
    init_alt_root();
-   fd = open(name, O_RDONLY);
-   if (fd >= 0) {
-      return fd;
-   }
 
    if (alt_root_len > 0) {
       strcpy(alt_path, alt_root);
@@ -73,6 +69,11 @@
             return fd;
          }
       }
+   } else {
+      fd = open(name, O_RDONLY);
+      if (fd >= 0) {
+         return fd;
+      }
    }
 
    return -1;
--- a/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/G1CollectedHeap.java	Mon Feb 27 14:50:58 2012 -0800
+++ b/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/G1CollectedHeap.java	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -49,8 +49,12 @@
     static private long g1CommittedFieldOffset;
     // size_t _summary_bytes_used;
     static private CIntegerField summaryBytesUsedField;
-    // G1MonitoringSupport* _g1mm
+    // G1MonitoringSupport* _g1mm;
     static private AddressField g1mmField;
+    // MasterOldRegionSet _old_set;
+    static private long oldSetFieldOffset;
+    // MasterHumongousRegionSet _humongous_set;
+    static private long humongousSetFieldOffset;
 
     static {
         VM.registerVMInitializedObserver(new Observer() {
@@ -67,12 +71,14 @@
         g1CommittedFieldOffset = type.getField("_g1_committed").getOffset();
         summaryBytesUsedField = type.getCIntegerField("_summary_bytes_used");
         g1mmField = type.getAddressField("_g1mm");
+        oldSetFieldOffset = type.getField("_old_set").getOffset();
+        humongousSetFieldOffset = type.getField("_humongous_set").getOffset();
     }
 
     public long capacity() {
         Address g1CommittedAddr = addr.addOffsetTo(g1CommittedFieldOffset);
-        MemRegion g1_committed = new MemRegion(g1CommittedAddr);
-        return g1_committed.byteSize();
+        MemRegion g1Committed = new MemRegion(g1CommittedAddr);
+        return g1Committed.byteSize();
     }
 
     public long used() {
@@ -94,6 +100,18 @@
         return (G1MonitoringSupport) VMObjectFactory.newObject(G1MonitoringSupport.class, g1mmAddr);
     }
 
+    public HeapRegionSetBase oldSet() {
+        Address oldSetAddr = addr.addOffsetTo(oldSetFieldOffset);
+        return (HeapRegionSetBase) VMObjectFactory.newObject(HeapRegionSetBase.class,
+                                                             oldSetAddr);
+    }
+
+    public HeapRegionSetBase humongousSet() {
+        Address humongousSetAddr = addr.addOffsetTo(humongousSetFieldOffset);
+        return (HeapRegionSetBase) VMObjectFactory.newObject(HeapRegionSetBase.class,
+                                                             humongousSetAddr);
+    }
+
     private Iterator<HeapRegion> heapRegionIterator() {
         return hrs().heapRegionIterator();
     }
--- a/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/G1MonitoringSupport.java	Mon Feb 27 14:50:58 2012 -0800
+++ b/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/G1MonitoringSupport.java	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -77,6 +77,10 @@
         return edenUsedField.getValue(addr);
     }
 
+    public long edenRegionNum() {
+        return edenUsed() / HeapRegion.grainBytes();
+    }
+
     public long survivorCommitted() {
         return survivorCommittedField.getValue(addr);
     }
@@ -85,6 +89,10 @@
         return survivorUsedField.getValue(addr);
     }
 
+    public long survivorRegionNum() {
+        return survivorUsed() / HeapRegion.grainBytes();
+    }
+
     public long oldCommitted() {
         return oldCommittedField.getValue(addr);
     }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/HeapRegionSetBase.java	Mon Feb 27 15:06:36 2012 -0800
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.gc_implementation.g1;
+
+import java.util.Iterator;
+import java.util.Observable;
+import java.util.Observer;
+
+import sun.jvm.hotspot.debugger.Address;
+import sun.jvm.hotspot.runtime.VM;
+import sun.jvm.hotspot.runtime.VMObject;
+import sun.jvm.hotspot.runtime.VMObjectFactory;
+import sun.jvm.hotspot.types.AddressField;
+import sun.jvm.hotspot.types.CIntegerField;
+import sun.jvm.hotspot.types.Type;
+import sun.jvm.hotspot.types.TypeDataBase;
+
+// Mirror class for HeapRegionSetBase. Represents a group of regions.
+
+public class HeapRegionSetBase extends VMObject {
+    // size_t _length;
+    static private CIntegerField lengthField;
+    // size_t _region_num;
+    static private CIntegerField regionNumField;
+    // size_t _total_used_bytes;
+    static private CIntegerField totalUsedBytesField;
+
+    static {
+        VM.registerVMInitializedObserver(new Observer() {
+                public void update(Observable o, Object data) {
+                    initialize(VM.getVM().getTypeDataBase());
+                }
+            });
+    }
+
+    static private synchronized void initialize(TypeDataBase db) {
+        Type type = db.lookupType("HeapRegionSetBase");
+
+        lengthField         = type.getCIntegerField("_length");
+        regionNumField      = type.getCIntegerField("_region_num");
+        totalUsedBytesField = type.getCIntegerField("_total_used_bytes");
+    }
+
+    public long length() {
+        return lengthField.getValue(addr);
+    }
+
+    public long regionNum() {
+        return regionNumField.getValue(addr);
+    }
+
+    public long totalUsedBytes() {
+        return totalUsedBytesField.getValue(addr);
+    }
+
+    public HeapRegionSetBase(Address addr) {
+        super(addr);
+    }
+}
--- a/agent/src/share/classes/sun/jvm/hotspot/memory/LoaderConstraintTable.java	Mon Feb 27 14:50:58 2012 -0800
+++ b/agent/src/share/classes/sun/jvm/hotspot/memory/LoaderConstraintTable.java	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012 Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -42,15 +42,6 @@
 
   private static synchronized void initialize(TypeDataBase db) {
     Type type = db.lookupType("LoaderConstraintTable");
-    nofBuckets = db.lookupIntConstant("LoaderConstraintTable::_nof_buckets").intValue();
-  }
-
-  // Fields
-  private static int nofBuckets;
-
-  // Accessors
-  public static int getNumOfBuckets() {
-    return nofBuckets;
   }
 
   public LoaderConstraintTable(Address addr) {
--- a/agent/src/share/classes/sun/jvm/hotspot/memory/SystemDictionary.java	Mon Feb 27 14:50:58 2012 -0800
+++ b/agent/src/share/classes/sun/jvm/hotspot/memory/SystemDictionary.java	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -36,7 +36,6 @@
   private static AddressField placeholdersField;
   private static AddressField loaderConstraintTableField;
   private static sun.jvm.hotspot.types.OopField javaSystemLoaderField;
-  private static int nofBuckets;
 
   private static sun.jvm.hotspot.types.OopField objectKlassField;
   private static sun.jvm.hotspot.types.OopField classLoaderKlassField;
@@ -62,7 +61,6 @@
     placeholdersField = type.getAddressField("_placeholders");
     loaderConstraintTableField = type.getAddressField("_loader_constraints");
     javaSystemLoaderField = type.getOopField("_java_system_loader");
-    nofBuckets = db.lookupIntConstant("SystemDictionary::_nof_buckets").intValue();
 
     objectKlassField = type.getOopField(WK_KLASS("Object_klass"));
     classLoaderKlassField = type.getOopField(WK_KLASS("ClassLoader_klass"));
@@ -142,10 +140,6 @@
     return newOop(javaSystemLoaderField.getValue());
   }
 
-  public static int getNumOfBuckets() {
-    return nofBuckets;
-  }
-
   private static Oop newOop(OopHandle handle) {
     return VM.getVM().getObjectHeap().newOop(handle);
   }
--- a/agent/src/share/classes/sun/jvm/hotspot/oops/ConstantPool.java	Mon Feb 27 14:50:58 2012 -0800
+++ b/agent/src/share/classes/sun/jvm/hotspot/oops/ConstantPool.java	Mon Feb 27 15:06:36 2012 -0800
@@ -648,7 +648,12 @@
   }
 
   public void printValueOn(PrintStream tty) {
-    tty.print("ConstantPool for " + getPoolHolder().getName().asString());
+    Oop holder = poolHolder.getValue(this);
+    if (holder instanceof Klass) {
+      tty.print("ConstantPool for " + ((Klass)holder).getName().asString());
+    } else {
+      tty.print("ConstantPool for partially loaded class");
+    }
   }
 
   public long getObjectSize() {
--- a/agent/src/share/classes/sun/jvm/hotspot/tools/HeapSummary.java	Mon Feb 27 14:50:58 2012 -0800
+++ b/agent/src/share/classes/sun/jvm/hotspot/tools/HeapSummary.java	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -67,6 +67,7 @@
       printValue("SurvivorRatio    = ", getFlagValue("SurvivorRatio", flagMap));
       printValMB("PermSize         = ", getFlagValue("PermSize", flagMap));
       printValMB("MaxPermSize      = ", getFlagValue("MaxPermSize", flagMap));
+      printValMB("G1HeapRegionSize = ", HeapRegion.grainBytes());
 
       System.out.println();
       System.out.println("Heap Usage:");
@@ -100,11 +101,20 @@
          } else if (sharedHeap instanceof G1CollectedHeap) {
              G1CollectedHeap g1h = (G1CollectedHeap) sharedHeap;
              G1MonitoringSupport g1mm = g1h.g1mm();
-             System.out.println("G1 Young Generation");
-             printG1Space("Eden Space:", g1mm.edenUsed(), g1mm.edenCommitted());
-             printG1Space("From Space:", g1mm.survivorUsed(), g1mm.survivorCommitted());
-             printG1Space("To Space:", 0, 0);
-             printG1Space("G1 Old Generation", g1mm.oldUsed(), g1mm.oldCommitted());
+             long edenRegionNum = g1mm.edenRegionNum();
+             long survivorRegionNum = g1mm.survivorRegionNum();
+             HeapRegionSetBase oldSet = g1h.oldSet();
+             HeapRegionSetBase humongousSet = g1h.humongousSet();
+             long oldRegionNum = oldSet.regionNum() + humongousSet.regionNum();
+             printG1Space("G1 Heap:", g1h.n_regions(),
+                          g1h.used(), g1h.capacity());
+             System.out.println("G1 Young Generation:");
+             printG1Space("Eden Space:", edenRegionNum,
+                          g1mm.edenUsed(), g1mm.edenCommitted());
+             printG1Space("Survivor Space:", survivorRegionNum,
+                          g1mm.survivorUsed(), g1mm.survivorCommitted());
+             printG1Space("G1 Old Generation:", oldRegionNum,
+                          g1mm.oldUsed(), g1mm.oldCommitted());
          } else {
              throw new RuntimeException("unknown SharedHeap type : " + heap.getClass());
          }
@@ -216,9 +226,11 @@
       System.out.println(alignment +  (double)space.used() * 100.0 / space.capacity() + "% used");
    }
 
-   private void printG1Space(String spaceName, long used, long capacity) {
+   private void printG1Space(String spaceName, long regionNum,
+                             long used, long capacity) {
       long free = capacity - used;
       System.out.println(spaceName);
+      printValue("regions  = ", regionNum);
       printValMB("capacity = ", capacity);
       printValMB("used     = ", used);
       printValMB("free     = ", free);
--- a/make/Makefile	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/Makefile	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -90,19 +90,31 @@
 SHARK_VM_TARGETS=productshark fastdebugshark optimizedshark jvmgshark
 GRAAL_VM_TARGETS=productgraal fastdebuggraal optimizedgraal jvmggraal
 
+COMMON_VM_PRODUCT_TARGETS=product product1 productkernel docs export_product
+COMMON_VM_FASTDEBUG_TARGETS=fastdebug fastdebug1 fastdebugkernel docs export_fastdebug
+COMMON_VM_DEBUG_TARGETS=jvmg jvmg1 jvmgkernel docs export_debug
+
 # JDK directory list
 JDK_DIRS=bin include jre lib demo
 
 all:           all_product all_fastdebug
-ifndef BUILD_CLIENT_ONLY
-all_product:   product product1 productkernel docs export_product
-all_fastdebug: fastdebug fastdebug1 fastdebugkernel docs export_fastdebug
-all_debug:     jvmg jvmg1 jvmgkernel docs export_debug
-else
+
+ifdef BUILD_CLIENT_ONLY
 all_product:   product1 docs export_product
 all_fastdebug: fastdebug1 docs export_fastdebug
 all_debug:     jvmg1 docs export_debug
+else
+ifeq ($(MACOSX_UNIVERSAL),true)
+all_product:   universal_product
+all_fastdebug: universal_fastdebug
+all_debug:     universal_debug
+else
+all_product:   $(COMMON_VM_PRODUCT_TARGETS)
+all_fastdebug: $(COMMON_VM_FASTDEBUG_TARGETS)
+all_debug:     $(COMMON_VM_DEBUG_TARGETS)
 endif
+endif
+
 all_optimized: optimized optimized1 optimizedkernel docs export_optimized
 
 allzero:           all_productzero all_fastdebugzero
@@ -245,20 +257,19 @@
 	$(MAKE) VM_SUBDIR=${VM_DEBUG} EXPORT_SUBDIR=/debug   generic_export
 export_optimized:
 	$(MAKE) VM_SUBDIR=optimized EXPORT_SUBDIR=/optimized generic_export
-export_product_jdk:
+export_product_jdk::
 	$(MAKE) ALT_EXPORT_PATH=$(JDK_IMAGE_DIR) \
 		VM_SUBDIR=product                            generic_export
-export_optimized_jdk:
+export_optimized_jdk::
 	$(MAKE) ALT_EXPORT_PATH=$(JDK_IMAGE_DIR) \
 		VM_SUBDIR=optimized                          generic_export
-export_fastdebug_jdk:
+export_fastdebug_jdk::
 	$(MAKE) ALT_EXPORT_PATH=$(JDK_IMAGE_DIR)/fastdebug \
 		VM_SUBDIR=fastdebug                          generic_export
-export_debug_jdk:
+export_debug_jdk::
 	$(MAKE) ALT_EXPORT_PATH=$(JDK_IMAGE_DIR)/debug \
 		VM_SUBDIR=${VM_DEBUG}                        generic_export
 
-
 # Export file copy rules
 XUSAGE=$(HS_SRC_DIR)/share/vm/Xusage.txt
 DOCS_DIR=$(OUTPUTDIR)/$(VM_PLATFORM)_docs
@@ -382,7 +393,7 @@
 $(EXPORT_LIB_DIR)/%.jar: $(GEN_DIR)/%.jar
 	$(install-file)
 
-# Include files (jvmti.h, jvmticmlr.h, jni.h, $(JDK_INCLUDE_SUBDIR)/jni_md.h, jmm.h)
+# Include files (jvmti.h, jvmticmlr.h, jni.h, $(JDK_INCLUDE_SUBDIR)/jni_md.h, jmm.h, jfr.h)
 $(EXPORT_INCLUDE_DIR)/%: $(GEN_DIR)/jvmtifiles/%
 	$(install-file)
 
@@ -399,6 +410,15 @@
 $(EXPORT_INCLUDE_DIR)/%: $(HS_SRC_DIR)/share/vm/services/%
 	$(install-file)
 
+JFR_EXISTS=$(shell if [ -d $(HS_ALT_SRC) ]; then echo 1; else echo 0; fi)
+# export jfr.h
+ifeq ($JFR_EXISTS,1)
+$(EXPORT_INCLUDE_DIR)/%: $(HS_ALT_SRC)/share/vm/jfr/agent/%
+	$(install-file)
+else
+$(EXPORT_INCLUDE_DIR)/jfr.h:
+endif
+
 # Doc files (jvmti.html)
 $(EXPORT_DOCS_DIR)/platform/jvmti/%: $(DOCS_DIR)/%
 	$(install-file)
@@ -443,21 +463,27 @@
 	 ($(CD) $(JDK_IMAGE_DIR) && $(TAR) -xf -)
 
 test_jdk:
-  ifneq ($(ZERO_BUILD), true)
     ifeq ($(ARCH_DATA_MODEL), 32)
-	$(JDK_IMAGE_DIR)/bin/java -client -version
+      ifneq ($(ZERO_BUILD), true)
+	$(JDK_IMAGE_DIR)/bin/java -d32 -client -Xinternalversion
+	$(JDK_IMAGE_DIR)/bin/java -d32 -client -version
+      endif
+	$(JDK_IMAGE_DIR)/bin/java -d32 -server -Xinternalversion
+	$(JDK_IMAGE_DIR)/bin/java -d32 -server -version
     endif
-  endif
-	$(JDK_IMAGE_DIR)/bin/java -server -version
+    ifeq ($(ARCH_DATA_MODEL), 64)
+	$(JDK_IMAGE_DIR)/bin/java -d64 -server -Xinternalversion
+	$(JDK_IMAGE_DIR)/bin/java -d64 -server -version
+    endif
 
-copy_product_jdk:
+copy_product_jdk::
 	$(RM) -r $(JDK_IMAGE_DIR)
 	$(MKDIR) -p $(JDK_IMAGE_DIR)
 	($(CD) $(JDK_IMPORT_PATH) && \
 	 $(TAR) -cf - $(JDK_DIRS)) | \
 	 ($(CD) $(JDK_IMAGE_DIR) && $(TAR) -xf -)
 
-copy_fastdebug_jdk:
+copy_fastdebug_jdk::
 	$(RM) -r $(JDK_IMAGE_DIR)/fastdebug
 	$(MKDIR) -p $(JDK_IMAGE_DIR)/fastdebug
 	if [ -d $(JDK_IMPORT_PATH)/fastdebug ] ; then \
@@ -470,7 +496,7 @@
 	   ($(CD) $(JDK_IMAGE_DIR)/fastdebug && $(TAR) -xf -) ; \
 	fi
 
-copy_debug_jdk:
+copy_debug_jdk::
 	$(RM) -r $(JDK_IMAGE_DIR)/debug
 	$(MKDIR) -p $(JDK_IMAGE_DIR)/debug
 	if [ -d $(JDK_IMPORT_PATH)/debug ] ; then \
@@ -487,36 +513,6 @@
 	   ($(CD) $(JDK_IMAGE_DIR)/debug && $(TAR) -xf -) ; \
 	fi
 
-# macosx universal builds
-
-ifeq ($(MACOSX_UNIVERSAL), true)
-$(UNIVERSAL_LIPO_LIST):
-	lipo -create -output $@ $(EXPORT_JRE_LIB_DIR)/{i386,amd64}/$(subst $(EXPORT_JRE_LIB_DIR)/,,$@)
-
-$(UNIVERSAL_COPY_LIST):
-	$(CP) $(EXPORT_JRE_LIB_DIR)/i386/$(subst $(EXPORT_JRE_LIB_DIR)/,,$@) $@
-
-universalize: $(UNIVERSAL_LIPO_LIST) $(UNIVERSAL_COPY_LIST)
-endif
-
-universal_product:
-	$(QUIETLY) $(MAKE) ARCH_DATA_MODEL=32 MACOSX_UNIVERSAL=true all_product
-	$(QUIETLY) $(MAKE) ARCH_DATA_MODEL=64 MACOSX_UNIVERSAL=true all_product
-	$(MKDIR) -p $(EXPORT_JRE_LIB_DIR)/{client,server}
-	$(QUIETLY) $(MAKE) MACOSX_UNIVERSAL=true universalize
-
-universal_fastdebug:
-	$(QUIETLY) $(MAKE) ARCH_DATA_MODEL=32 MACOSX_UNIVERSAL=true all_fastdebug
-	$(QUIETLY) $(MAKE) ARCH_DATA_MODEL=64 MACOSX_UNIVERSAL=true all_fastdebug
-	$(MKDIR) -p $(EXPORT_JRE_LIB_DIR)/{client,server}
-	$(QUIETLY) $(MAKE) MACOSX_UNIVERSAL=true universalize
-
-universal_debug:
-	$(QUIETLY) $(MAKE) ARCH_DATA_MODEL=32 MACOSX_UNIVERSAL=true all_debug
-	$(QUIETLY) $(MAKE) ARCH_DATA_MODEL=64 MACOSX_UNIVERSAL=true all_debug
-	$(MKDIR) -p $(EXPORT_JRE_LIB_DIR)/{client,server}
-	$(QUIETLY) $(MAKE) MACOSX_UNIVERSAL=true universalize
-
 #
 # Check target
 #
@@ -570,6 +566,7 @@
 OUTPUTDIR.desc             = Output directory, default is build/<osname>
 BOOTDIR.desc               = JDK used to compile agent java source and test with
 JDK_IMPORT_PATH.desc       = Promoted JDK to copy for 'create_jdk'
+JDK_IMAGE_DIR.desc         = Directory to place JDK to copy
 EXPORT_PATH.desc           = Directory to place files to export for JDK build
 
 # Make variables to print out (description and value)
@@ -578,6 +575,7 @@
     OUTPUTDIR                   \
     BOOTDIR                     \
     JDK_IMPORT_PATH             \
+    JDK_IMAGE_DIR               \
     EXPORT_PATH
 
 # Make variables that should refer to directories that exist
@@ -636,6 +634,13 @@
 	@$(ECHO) \
 "  $(MAKE) ALT_JDK_IMPORT_PATH=/opt/java/jdk$(JDK_VERSION)"
 
+# Universal build support
+ifeq ($(OS_VENDOR), Darwin)
+ifeq ($(MACOSX_UNIVERSAL),true)
+include $(GAMMADIR)/make/$(OSNAME)/makefiles/universal.gmk
+endif
+endif
+
 # JPRT rule to build this workspace
 include $(GAMMADIR)/make/jprt.gmk
 
@@ -645,6 +650,4 @@
 	export_product export_fastdebug export_debug export_optimized \
 	export_jdk_product export_jdk_fastdebug export_jdk_debug \
 	create_jdk copy_jdk update_jdk test_jdk \
-	copy_product_jdk copy_fastdebug_jdk copy_debug_jdk universalize \
-	universal_product
-
+	copy_product_jdk copy_fastdebug_jdk copy_debug_jdk 
--- a/make/bsd/Makefile	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/bsd/Makefile	Mon Feb 27 15:06:36 2012 -0800
@@ -210,7 +210,7 @@
 TARGETS_GRAAL     = $(addsuffix graal,$(TARGETS))
 
 BUILDTREE_MAKE    = $(GAMMADIR)/make/$(OSNAME)/makefiles/buildtree.make
-BUILDTREE_VARS    = GAMMADIR=$(GAMMADIR) OS_FAMILY=$(OSNAME) SRCARCH=$(SRCARCH) BUILDARCH=$(BUILDARCH) LIBARCH=$(LIBARCH)
+BUILDTREE_VARS    = GAMMADIR=$(GAMMADIR) OS_FAMILY=$(OSNAME) SRCARCH=$(SRCARCH) BUILDARCH=$(BUILDARCH) LIBARCH=$(LIBARCH) LIBRARY_SUFFIX=$(LIBRARY_SUFFIX)
 BUILDTREE_VARS   += HOTSPOT_RELEASE_VERSION=$(HOTSPOT_RELEASE_VERSION) HOTSPOT_BUILD_VERSION=$(HOTSPOT_BUILD_VERSION) JRE_RELEASE_VERSION=$(JRE_RELEASE_VERSION)
 
 BUILDTREE         = $(MAKE) -f $(BUILDTREE_MAKE) $(BUILDTREE_VARS)
--- a/make/bsd/makefiles/adlc.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/bsd/makefiles/adlc.make	Mon Feb 27 15:06:36 2012 -0800
@@ -39,9 +39,16 @@
 
 SOURCE.AD = $(OUTDIR)/$(OS)_$(Platform_arch_model).ad 
 
-SOURCES.AD = \
+ifeq ("${Platform_arch_model}", "${Platform_arch}")
+  SOURCES.AD = \
   $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \
   $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad)
+else
+  SOURCES.AD = \
+  $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \
+  $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch).ad) \
+  $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad)
+endif
 
 EXEC	= $(OUTDIR)/adlc
 
@@ -54,10 +61,10 @@
 INCLUDES += $(Src_Dirs_I:%=-I%)
 
 # set flags for adlc compilation
-CPPFLAGS = $(SYSDEFS) $(INCLUDES)
+CXXFLAGS = $(SYSDEFS) $(INCLUDES)
 
 # Force assertions on.
-CPPFLAGS += -DASSERT
+CXXFLAGS += -DASSERT
 
 # CFLAGS_WARN holds compiler options to suppress/enable warnings.
 # Compiler warnings are treated as errors
@@ -104,7 +111,7 @@
 
 $(EXEC) : $(OBJECTS)
 	@echo Making adlc
-	$(QUIETLY) $(HOST.LINK_NOPROF.CC) -o $(EXEC) $(OBJECTS)
+	$(QUIETLY) $(HOST.LINK_NOPROF.CXX) -o $(EXEC) $(OBJECTS)
 
 # Random dependencies:
 $(OBJECTS): opcodes.hpp classes.hpp adlc.hpp adlcVMDeps.hpp adlparse.hpp archDesc.hpp arena.hpp dict2.hpp filebuff.hpp forms.hpp formsopt.hpp formssel.hpp
@@ -206,14 +213,14 @@
 $(OUTDIR)/%.o: %.cpp
 	@echo Compiling $<
 	$(QUIETLY) $(REMOVE_TARGET)
-	$(QUIETLY) $(HOST.COMPILE.CC) -o $@ $< $(COMPILE_DONE)
+	$(QUIETLY) $(HOST.COMPILE.CXX) -o $@ $< $(COMPILE_DONE)
 
 # Some object files are given a prefix, to disambiguate
 # them from objects of the same name built for the VM.
 $(OUTDIR)/adlc-%.o: %.cpp
 	@echo Compiling $<
 	$(QUIETLY) $(REMOVE_TARGET)
-	$(QUIETLY) $(HOST.COMPILE.CC) -o $@ $< $(COMPILE_DONE)
+	$(QUIETLY) $(HOST.COMPILE.CXX) -o $@ $< $(COMPILE_DONE)
 
 # #########################################################################
 
--- a/make/bsd/makefiles/buildtree.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/bsd/makefiles/buildtree.make	Mon Feb 27 15:06:36 2012 -0800
@@ -162,20 +162,6 @@
   endif
 endif
 
-ifeq ($(OS_VENDOR), Darwin)
-  # MACOSX FIXME: we should be able to run test_gamma (see MACOSX_PORT-214)
-  ifeq ($(ALWAYS_PASS_TEST_GAMMA),)
-    # ALWAYS_PASS_TEST_GAMMA wasn't set so we default to true on MacOS X
-    # until MACOSX_PORT-214 is fixed
-    ALWAYS_PASS_TEST_GAMMA=true
-  endif
-endif
-ifeq ($(ALWAYS_PASS_TEST_GAMMA), true)
-  TEST_GAMMA_STATUS= echo 'exit 0';
-else
-  TEST_GAMMA_STATUS=
-endif
-
 BUILDTREE_VARS += HOTSPOT_RELEASE_VERSION=$(HS_BUILD_VER) HOTSPOT_BUILD_VERSION=  JRE_RELEASE_VERSION=$(JRE_RELEASE_VERSION)
 
 BUILDTREE	= \
@@ -353,12 +339,10 @@
 	$(BUILDTREE_COMMENT); \
 	[ -n "$$JAVA_HOME" ] && { echo ": \$${JAVA_HOME:=$${JAVA_HOME}}"; }; \
 	{ \
-	echo "LD_LIBRARY_PATH=.:$${LD_LIBRARY_PATH:+$$LD_LIBRARY_PATH:}\$${JAVA_HOME}/jre/lib/${LIBARCH}/native_threads:\$${JAVA_HOME}/jre/lib/${LIBARCH}:${GCC_LIB}"; \
-	echo "DYLD_LIBRARY_PATH=.:$${DYLD_LIBRARY_PATH:+$$DYLD_LIBRARY_PATH:}\$${JAVA_HOME}/jre/lib/${LIBARCH}/native_threads:\$${JAVA_HOME}/jre/lib/${LIBARCH}:${GCC_LIB}"; \
 	echo "CLASSPATH=$${CLASSPATH:+$$CLASSPATH:}.:\$${JAVA_HOME}/jre/lib/rt.jar:\$${JAVA_HOME}/jre/lib/i18n.jar"; \
 	} | sed s:$${JAVA_HOME:--------}:\$${JAVA_HOME}:g; \
 	echo "HOTSPOT_BUILD_USER=\"$${LOGNAME:-$$USER} in `basename $(GAMMADIR)`\""; \
-	echo "export JAVA_HOME LD_LIBRARY_PATH DYLD_LIBRARY_PATH CLASSPATH HOTSPOT_BUILD_USER"; \
+	echo "export JAVA_HOME CLASSPATH HOTSPOT_BUILD_USER"; \
 	) > $@
 
 env.csh: env.sh
@@ -412,7 +396,7 @@
 JAVA_FLAG/64 = -d64
 
 WRONG_DATA_MODE_MSG = \
-	echo "JAVA_HOME must point to $(DATA_MODE)bit JDK."
+	echo "JAVA_HOME must point to a $(DATA_MODE)-bit OpenJDK."
 
 CROSS_COMPILING_MSG = \
 	echo "Cross compiling for ARCH $(CROSS_COMPILE_ARCH), skipping gamma run."
@@ -420,20 +404,78 @@
 test_gamma:  $(BUILDTREE_MAKE) $(GAMMADIR)/make/test/Queens.java
 	@echo Creating $@ ...
 	$(QUIETLY) ( \
-	echo '#!/bin/sh'; \
+	echo "#!/bin/sh"; \
+	echo ""; \
 	$(BUILDTREE_COMMENT); \
-	echo '. ./env.sh'; \
-	echo "if [ \"$(CROSS_COMPILE_ARCH)\" != \"\" ]; then { $(CROSS_COMPILING_MSG); exit 0; }; fi"; \
-	echo "if [ -z \$$JAVA_HOME ]; then { $(NO_JAVA_HOME_MSG); exit 0; }; fi"; \
-	echo "if ! \$${JAVA_HOME}/bin/java $(JAVA_FLAG) -fullversion 2>&1 > /dev/null"; \
-	echo "then"; \
-	echo "  $(WRONG_DATA_MODE_MSG); exit 0;"; \
+	echo ""; \
+	echo "# Include environment settings for gamma run"; \
+	echo ""; \
+	echo ". ./env.sh"; \
+	echo ""; \
+	echo "# Do not run gamma test for cross compiles"; \
+	echo ""; \
+	echo "if [ -n \"$(CROSS_COMPILE_ARCH)\" ]; then "; \
+	echo "  $(CROSS_COMPILING_MSG)"; \
+	echo "  exit 0"; \
+	echo "fi"; \
+	echo ""; \
+	echo "# Make sure JAVA_HOME is set as it is required for gamma"; \
+	echo ""; \
+	echo "if [ -z \"\$${JAVA_HOME}\" ]; then "; \
+	echo "  $(NO_JAVA_HOME_MSG)"; \
+	echo "  exit 0"; \
+	echo "fi"; \
+	echo ""; \
+	echo "# Check JAVA_HOME version to be used for the test"; \
+	echo ""; \
+	echo "\$${JAVA_HOME}/bin/java $(JAVA_FLAG) -fullversion > /dev/null 2>&1"; \
+	echo "if [ \$$? -ne 0 ]; then "; \
+	echo "  $(WRONG_DATA_MODE_MSG)"; \
+	echo "  exit 0"; \
 	echo "fi"; \
+	echo ""; \
+	echo "# Use gamma_g if it exists"; \
+	echo ""; \
+	echo "GAMMA_PROG=gamma"; \
+	echo "if [ -f gamma_g ]; then "; \
+	echo "  GAMMA_PROG=gamma_g"; \
+	echo "fi"; \
+	echo ""; \
+	echo "if [ \"$(OS_VENDOR)\" = \"Darwin\" ]; then "; \
+	echo "  # Ensure architecture for gamma and JAVA_HOME is the same."; \
+	echo "  # NOTE: gamma assumes the OpenJDK directory layout."; \
+	echo ""; \
+	echo "  GAMMA_ARCH=\"\`file \$${GAMMA_PROG} | awk '{print \$$NF}'\`\""; \
+	echo "  JVM_LIB=\"\$${JAVA_HOME}/jre/lib/libjava.$(LIBRARY_SUFFIX)\""; \
+	echo "  if [ ! -f \$${JVM_LIB} ]; then"; \
+	echo "    JVM_LIB=\"\$${JAVA_HOME}/jre/lib/$${LIBARCH}/libjava.$(LIBRARY_SUFFIX)\""; \
+	echo "  fi"; \
+	echo "  if [ ! -f \$${JVM_LIB} ] || [ -z \"\`file \$${JVM_LIB} | grep \$${GAMMA_ARCH}\`\" ]; then "; \
+	echo "    $(WRONG_DATA_MODE_MSG)"; \
+	echo "    exit 0"; \
+	echo "  fi"; \
+	echo "fi"; \
+	echo ""; \
+	echo "# Compile Queens program for test"; \
+	echo ""; \
 	echo "rm -f Queens.class"; \
 	echo "\$${JAVA_HOME}/bin/javac -d . $(GAMMADIR)/make/test/Queens.java"; \
-	echo '[ -f gamma_g ] && { gamma=gamma_g; }'; \
-	echo './$${gamma:-gamma} $(TESTFLAGS) Queens < /dev/null'; \
-	$(TEST_GAMMA_STATUS) \
+	echo ""; \
+	echo "# Set library path solely for gamma launcher test run"; \
+	echo ""; \
+	echo "LD_LIBRARY_PATH=.:$${LD_LIBRARY_PATH:+$$LD_LIBRARY_PATH:}\$${JAVA_HOME}/jre/lib/${LIBARCH}/native_threads:\$${JAVA_HOME}/jre/lib/${LIBARCH}:${GCC_LIB}"; \
+	echo "export LD_LIBRARY_PATH"; \
+	echo "unset LD_LIBRARY_PATH_32"; \
+	echo "unset LD_LIBRARY_PATH_64"; \
+	echo ""; \
+	echo "if [ \"$(OS_VENDOR)\" = \"Darwin\" ]; then "; \
+	echo "  DYLD_LIBRARY_PATH=.:$${DYLD_LIBRARY_PATH:+$$DYLD_LIBRARY_PATH:}\$${JAVA_HOME}/jre/lib/native_threads:\$${JAVA_HOME}/jre/lib:$${DYLD_LIBRARY_PATH:+$$DYLD_LIBRARY_PATH:}\$${JAVA_HOME}/jre/lib/${LIBARCH}/native_threads:\$${JAVA_HOME}/jre/lib/${LIBARCH}:${GCC_LIB}"; \
+	echo "  export DYLD_LIBRARY_PATH"; \
+	echo "fi"; \
+	echo ""; \
+	echo "# Use the gamma launcher and JAVA_HOME to run the test"; \
+	echo ""; \
+	echo "./\$${GAMMA_PROG} $(TESTFLAGS) Queens < /dev/null"; \
 	) > $@
 	$(QUIETLY) chmod +x $@
 
--- a/make/bsd/makefiles/defs.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/bsd/makefiles/defs.make	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -142,6 +142,7 @@
 # client and server subdirectories have symbolic links to ../libjsig.so
 EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libjsig.$(LIBRARY_SUFFIX)
 EXPORT_SERVER_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/server
+EXPORT_CLIENT_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/client
 
 ifndef BUILD_CLIENT_ONLY
 EXPORT_LIST += $(EXPORT_SERVER_DIR)/Xusage.txt
@@ -150,7 +151,6 @@
 
 ifneq ($(ZERO_BUILD), true)
   ifeq ($(ARCH_DATA_MODEL), 32)
-    EXPORT_CLIENT_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/client
     EXPORT_LIST += $(EXPORT_CLIENT_DIR)/Xusage.txt
     EXPORT_LIST += $(EXPORT_CLIENT_DIR)/libjvm.$(LIBRARY_SUFFIX)
   endif
@@ -171,10 +171,39 @@
 
 EXPORT_LIST += $(ADD_SA_BINARIES/$(HS_ARCH))
 
-UNIVERSAL_LIPO_LIST += $(EXPORT_JRE_LIB_DIR)/libjsig.$(LIBRARY_SUFFIX)
-UNIVERSAL_LIPO_LIST += $(EXPORT_JRE_LIB_DIR)/libsaproc.$(LIBRARY_SUFFIX)
-UNIVERSAL_LIPO_LIST += $(EXPORT_JRE_LIB_DIR)/server/libjvm.$(LIBRARY_SUFFIX)
+# Universal build settings
+ifeq ($(OS_VENDOR), Darwin)
+  # Build universal binaries by default on Mac OS X
+  MACOSX_UNIVERSAL = true
+  ifneq ($(ALT_MACOSX_UNIVERSAL),)
+    MACOSX_UNIVERSAL = $(ALT_MACOSX_UNIVERSAL)
+  endif
+  MAKE_ARGS += MACOSX_UNIVERSAL=$(MACOSX_UNIVERSAL)
+
+  # Universal settings
+  ifeq ($(MACOSX_UNIVERSAL), true)
+
+    # Set universal export path but avoid using ARCH or PLATFORM subdirs
+    EXPORT_PATH=$(OUTPUTDIR)/export-universal$(EXPORT_SUBDIR)
+    ifneq ($(ALT_EXPORT_PATH),)
+      EXPORT_PATH=$(ALT_EXPORT_PATH)
+    endif
 
-UNIVERSAL_COPY_LIST += $(EXPORT_JRE_LIB_DIR)/server/Xusage.txt
-UNIVERSAL_COPY_LIST += $(EXPORT_JRE_LIB_DIR)/client/Xusage.txt
-UNIVERSAL_COPY_LIST += $(EXPORT_JRE_LIB_DIR)/client/libjvm.$(LIBRARY_SUFFIX)
+    # Set universal image dir
+    JDK_IMAGE_DIR=$(OUTPUTDIR)/jdk-universal$(EXPORT_SUBDIR)
+    ifneq ($(ALT_JDK_IMAGE_DIR),)
+      JDK_IMAGE_DIR=$(ALT_JDK_IMAGE_DIR)
+    endif
+
+    # Binaries to 'universalize' if built
+    UNIVERSAL_LIPO_LIST += $(EXPORT_JRE_LIB_DIR)/libjsig.$(LIBRARY_SUFFIX)
+    UNIVERSAL_LIPO_LIST += $(EXPORT_JRE_LIB_DIR)/libsaproc.$(LIBRARY_SUFFIX)
+    UNIVERSAL_LIPO_LIST += $(EXPORT_JRE_LIB_DIR)/server/libjvm.$(LIBRARY_SUFFIX)
+    UNIVERSAL_LIPO_LIST += $(EXPORT_JRE_LIB_DIR)/client/libjvm.$(LIBRARY_SUFFIX)
+
+    # Files to simply copy in place
+    UNIVERSAL_COPY_LIST += $(EXPORT_JRE_LIB_DIR)/server/Xusage.txt
+    UNIVERSAL_COPY_LIST += $(EXPORT_JRE_LIB_DIR)/client/Xusage.txt
+
+  endif
+endif
--- a/make/bsd/makefiles/dtrace.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/bsd/makefiles/dtrace.make	Mon Feb 27 15:06:36 2012 -0800
@@ -105,11 +105,11 @@
 
 lib$(GENOFFS).dylib: $(DTRACE_SRCDIR)/$(GENOFFS).cpp $(DTRACE_SRCDIR)/$(GENOFFS).h \
                   $(LIBJVM.o)
-	$(QUIETLY) $(CCC) $(CPPFLAGS) $(GENOFFS_CFLAGS) $(SHARED_FLAG) $(PICFLAG) \
+	$(QUIETLY) $(CXX) $(CXXFLAGS) $(GENOFFS_CFLAGS) $(SHARED_FLAG) $(PICFLAG) \
 		 $(LFLAGS_GENOFFS) -o $@ $(DTRACE_SRCDIR)/$(GENOFFS).cpp -ljvm
 
 $(GENOFFS): $(DTRACE_SRCDIR)/$(GENOFFS)Main.c lib$(GENOFFS).dylib
-	$(QUIETLY) $(LINK.CC) -o $@ $(DTRACE_SRCDIR)/$(GENOFFS)Main.c \
+	$(QUIETLY) $(LINK.CXX) -o $@ $(DTRACE_SRCDIR)/$(GENOFFS)Main.c \
 		./lib$(GENOFFS).dylib
 
 # $@.tmp is created first to avoid an empty $(JVMOFFS).h if an error occurs.
@@ -135,7 +135,7 @@
 	fi
 
 $(JVMOFFS.o): $(JVMOFFS).h $(JVMOFFS).cpp 
-	$(QUIETLY) $(CCC) -c -I. -o $@ $(ARCHFLAG) -D$(TYPE) $(JVMOFFS).cpp
+	$(QUIETLY) $(CXX) -c -I. -o $@ $(ARCHFLAG) -D$(TYPE) $(JVMOFFS).cpp
 
 $(LIBJVM_DB): $(DTRACE_SRCDIR)/$(JVM_DB).c $(JVMOFFS.o) $(XLIBJVM_DB) $(LIBJVM_DB_MAPFILE)
 	@echo Making $@
--- a/make/bsd/makefiles/gcc.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/bsd/makefiles/gcc.make	Mon Feb 27 15:06:36 2012 -0800
@@ -25,20 +25,19 @@
 OS_VENDOR = $(shell uname -s)
 
 #------------------------------------------------------------------------
-# CC, CPP & AS
+# CC, CXX & AS
 
 # When cross-compiling the ALT_COMPILER_PATH points
 # to the cross-compilation toolset
 ifdef CROSS_COMPILE_ARCH
- CPP = $(ALT_COMPILER_PATH)/g++
+ CXX = $(ALT_COMPILER_PATH)/g++
  CC  = $(ALT_COMPILER_PATH)/gcc
- HOSTCPP = g++
+ HOSTCXX = g++
  HOSTCC  = gcc
 else ifneq ($(OS_VENDOR), Darwin)
  CXX = g++
- CPP = $(CXX)
  CC  = gcc
- HOSTCPP = $(CPP)
+ HOSTCXX = $(CXX)
  HOSTCC  = $(CC)
 endif
 
@@ -53,7 +52,6 @@
   ifeq ($(origin CC), default)
    CC  = llvm-gcc
   endif
-  CPP  = $(CXX)
 
   ifeq ($(ARCH), i486)
   LLVM_SUPPORTS_STACKREALIGN := $(shell \
@@ -67,11 +65,11 @@
     CXX32 ?= g++-4.0
     CC32  ?= gcc-4.0
   endif
-  CPP = $(CXX32)
+  CXX = $(CXX32)
   CC  = $(CC32)
   endif
 
-  HOSTCPP = $(CPP)
+  HOSTCXX = $(CXX)
   HOSTCC  = $(CC)
 endif
 
--- a/make/bsd/makefiles/launcher.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/bsd/makefiles/launcher.make	Mon Feb 27 15:06:36 2012 -0800
@@ -50,14 +50,31 @@
   LIBS_LAUNCHER             += $(STATIC_STDCXX) $(LIBS)
 else
   LAUNCHER.o                 = launcher.o
-  LFLAGS_LAUNCHER           += -L`pwd`
+  LFLAGS_LAUNCHER           += -L`pwd` 
+
+  # The gamma launcher runs the JDK from $JAVA_HOME, overriding the JVM with a
+  # freshly built JVM at ./libjvm.{so|dylib}.  This is accomplished by setting 
+  # the library searchpath using ({DY}LD_LIBRARY_PATH) to find the local JVM 
+  # first.  Gamma dlopen()s libjava from $JAVA_HOME/jre/lib{/$arch}, which is
+  # statically linked with CoreFoundation framework libs. Unfortunately, gamma's
+  # unique searchpath results in some unresolved symbols in the framework 
+  # libraries, because JDK libraries are inadvertently discovered first on the
+  # searchpath, e.g. libjpeg.  On Mac OS X, filenames are case *insensitive*.
+  # So, the actual filename collision is libjpeg.dylib and libJPEG.dylib.
+  # To resolve this, gamma needs to also statically link with the CoreFoundation 
+  # framework libraries.
+
+  ifeq ($(OS_VENDOR),Darwin)
+    LFLAGS_LAUNCHER         += -framework CoreFoundation 
+  endif
+
   LIBS_LAUNCHER             += -l$(JVM) $(LIBS)
 endif
 
-LINK_LAUNCHER = $(LINK.c)
+LINK_LAUNCHER = $(LINK.CC)
 
-LINK_LAUNCHER/PRE_HOOK  = $(LINK_LIB.CC/PRE_HOOK)
-LINK_LAUNCHER/POST_HOOK = $(LINK_LIB.CC/POST_HOOK)
+LINK_LAUNCHER/PRE_HOOK  = $(LINK_LIB.CXX/PRE_HOOK)
+LINK_LAUNCHER/POST_HOOK = $(LINK_LIB.CXX/POST_HOOK)
 
 LAUNCHER_OUT = launcher
 
@@ -73,11 +90,11 @@
 
 $(LAUNCHER_OUT)/%.o: $(LAUNCHERDIR_SHARE)/%.c
 	$(QUIETLY) [ -d $(LAUNCHER_OUT) ] || { mkdir -p $(LAUNCHER_OUT); }
-	$(QUIETLY) $(CC) -g -o $@ -c $< -MMD $(LAUNCHERFLAGS) $(CPPFLAGS)
+	$(QUIETLY) $(CC) -g -o $@ -c $< -MMD $(LAUNCHERFLAGS) $(CXXFLAGS)
 
 $(LAUNCHER_OUT)/%.o: $(LAUNCHERDIR)/%.c
 	$(QUIETLY) [ -d $(LAUNCHER_OUT) ] || { mkdir -p $(LAUNCHER_OUT); }
-	$(QUIETLY) $(CC) -g -o $@ -c $< -MMD $(LAUNCHERFLAGS) $(CPPFLAGS)
+	$(QUIETLY) $(CC) -g -o $@ -c $< -MMD $(LAUNCHERFLAGS) $(CXXFLAGS)
 
 $(LAUNCHER): $(OBJS) $(LIBJVM) $(LAUNCHER_MAPFILE)
 	$(QUIETLY) echo Linking launcher...
--- a/make/bsd/makefiles/product.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/bsd/makefiles/product.make	Mon Feb 27 15:06:36 2012 -0800
@@ -55,4 +55,4 @@
 STRIP_AOUT   = $(STRIP) -x $@ || exit 1;
 
 # Don't strip in VM build; JDK build will strip libraries later
-# LINK_LIB.CC/POST_HOOK += $(STRIP_$(LINK_INTO))
+# LINK_LIB.CXX/POST_HOOK += $(STRIP_$(LINK_INTO))
--- a/make/bsd/makefiles/rules.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/bsd/makefiles/rules.make	Mon Feb 27 15:06:36 2012 -0800
@@ -27,52 +27,39 @@
 # Tell make that .cpp is important
 .SUFFIXES: .cpp $(SUFFIXES)
 
-# For now.  Other makefiles use CPP as the c++ compiler, but that should really
-# name the preprocessor.
-ifeq    ($(CCC),)
-CCC             = $(CPP)
-endif
-
 DEMANGLER       = c++filt
 DEMANGLE        = $(DEMANGLER) < $@ > .$@ && mv -f .$@ $@
 
-# $(CC) is the c compiler (cc/gcc), $(CCC) is the c++ compiler (CC/g++).
-C_COMPILE       = $(CC) $(CPPFLAGS) $(CFLAGS)
-CC_COMPILE      = $(CCC) $(CPPFLAGS) $(CFLAGS)
+# $(CC) is the c compiler (cc/gcc), $(CXX) is the c++ compiler (CC/g++).
+CC_COMPILE       = $(CC) $(CXXFLAGS) $(CFLAGS)
+CXX_COMPILE      = $(CXX) $(CXXFLAGS) $(CFLAGS)
 
 AS.S            = $(AS) $(ASFLAGS)
 
-COMPILE.c       = $(C_COMPILE) -c
-GENASM.c        = $(C_COMPILE) -S
-LINK.c          = $(CC) $(LFLAGS) $(AOUT_FLAGS) $(PROF_AOUT_FLAGS)
-LINK_LIB.c      = $(CC) $(LFLAGS) $(SHARED_FLAG)
-PREPROCESS.c    = $(C_COMPILE) -E
+COMPILE.CC       = $(CC_COMPILE) -c
+GENASM.CC        = $(CC_COMPILE) -S
+LINK.CC          = $(CC) $(LFLAGS) $(AOUT_FLAGS) $(PROF_AOUT_FLAGS)
+LINK_LIB.CC      = $(CC) $(LFLAGS) $(SHARED_FLAG)
+PREPROCESS.CC    = $(CC_COMPILE) -E
 
-COMPILE.CC      = $(CC_COMPILE) -c
-GENASM.CC       = $(CC_COMPILE) -S
-LINK.CC         = $(CCC) $(LFLAGS) $(AOUT_FLAGS) $(PROF_AOUT_FLAGS)
-LINK_NOPROF.CC  = $(CCC) $(LFLAGS) $(AOUT_FLAGS)
-LINK_LIB.CC     = $(CCC) $(LFLAGS) $(SHARED_FLAG)
-PREPROCESS.CC   = $(CC_COMPILE) -E
+COMPILE.CXX      = $(CXX_COMPILE) -c
+GENASM.CXX       = $(CXX_COMPILE) -S
+LINK.CXX         = $(CXX) $(LFLAGS) $(AOUT_FLAGS) $(PROF_AOUT_FLAGS)
+LINK_NOPROF.CXX  = $(CXX) $(LFLAGS) $(AOUT_FLAGS)
+LINK_LIB.CXX     = $(CXX) $(LFLAGS) $(SHARED_FLAG)
+PREPROCESS.CXX   = $(CXX_COMPILE) -E
 
 # cross compiling the jvm with c2 requires host compilers to build
 # adlc tool
 
-HOST.CC_COMPILE      = $(HOSTCPP) $(CPPFLAGS) $(CFLAGS)
-HOST.COMPILE.CC      = $(HOST.CC_COMPILE) -c
-HOST.LINK_NOPROF.CC  = $(HOSTCPP) $(LFLAGS) $(AOUT_FLAGS)
+HOST.CXX_COMPILE      = $(HOSTCXX) $(CXXFLAGS) $(CFLAGS)
+HOST.COMPILE.CXX      = $(HOST.CXX_COMPILE) -c
+HOST.LINK_NOPROF.CXX  = $(HOSTCXX) $(LFLAGS) $(AOUT_FLAGS)
 
 
 # Effect of REMOVE_TARGET is to delete out-of-date files during "gnumake -k".
 REMOVE_TARGET   = rm -f $@
 
-# Synonyms.
-COMPILE.cpp     = $(COMPILE.CC)
-GENASM.cpp      = $(GENASM.CC)
-LINK.cpp        = $(LINK.CC)
-LINK_LIB.cpp    = $(LINK_LIB.CC)
-PREPROCESS.cpp  = $(PREPROCESS.CC)
-
 # Note use of ALT_BOOTDIR to explicitly specify location of java and
 # javac; this is the same environment variable used in the J2SE build
 # process for overriding the default spec, which is BOOTDIR.
@@ -161,14 +148,14 @@
 %.o: %.cpp
 	@echo Compiling $<
 	$(QUIETLY) $(REMOVE_TARGET)
-	$(QUIETLY) $(COMPILE.CC) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE)
+	$(QUIETLY) $(COMPILE.CXX) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE)
 else
 %.o: %.cpp
 	@echo Compiling $<
 	$(QUIETLY) $(REMOVE_TARGET)
 	$(QUIETLY) $(if $(findstring $@, $(NONPIC_OBJ_FILES)), \
-	   $(subst $(VM_PICFLAG), ,$(COMPILE.CC)) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE), \
-	   $(COMPILE.CC) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE))
+	   $(subst $(VM_PICFLAG), ,$(COMPILE.CXX)) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE), \
+	   $(COMPILE.CXX) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE))
 endif
 
 %.o: %.s
@@ -178,13 +165,13 @@
 
 %.s: %.cpp
 	@echo Generating assembly for $<
-	$(QUIETLY) $(GENASM.CC) -o $@ $<
+	$(QUIETLY) $(GENASM.CXX) -o $@ $<
 	$(QUIETLY) $(DEMANGLE) $(COMPILE_DONE)
 
 # Intermediate files (for debugging macros)
 %.i: %.cpp
 	@echo Preprocessing $< to $@
-	$(QUIETLY) $(PREPROCESS.CC) $< > $@ $(COMPILE_DONE)
+	$(QUIETLY) $(PREPROCESS.CXX) $< > $@ $(COMPILE_DONE)
 
 #  Override gnumake built-in rules which do sccs get operations badly.
 #  (They put the checked out code in the current directory, not in the
--- a/make/bsd/makefiles/sparcWorks.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/bsd/makefiles/sparcWorks.make	Mon Feb 27 15:06:36 2012 -0800
@@ -23,13 +23,13 @@
 #
 
 #------------------------------------------------------------------------
-# CC, CPP & AS
+# CC, CXX & AS
 
-CPP = CC
+CXX = CC
 CC  = cc
 AS  = $(CC) -c
 
-HOSTCPP = $(CPP)
+HOSTCXX = $(CXX)
 HOSTCC  = $(CC)
 
 ARCHFLAG = $(ARCHFLAG/$(BUILDARCH))
--- a/make/bsd/makefiles/top.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/bsd/makefiles/top.make	Mon Feb 27 15:06:36 2012 -0800
@@ -124,8 +124,8 @@
 	@$(UpdatePCH)
 	@$(MAKE) -f vm.make $(MFLAGS-adjusted)
 
-install: the_vm
-	@$(MAKE) -f vm.make install
+install gamma: the_vm
+	@$(MAKE) -f vm.make $@
 
 # next rules support "make foo.[ois]"
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/make/bsd/makefiles/universal.gmk	Mon Feb 27 15:06:36 2012 -0800
@@ -0,0 +1,113 @@
+#
+# Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+# or visit www.oracle.com if you need additional information or have any
+# questions.
+#  
+#
+
+# macosx universal builds
+universal_product:
+	$(MAKE) MACOSX_UNIVERSAL=true all_product_universal
+universal_fastdebug:
+	$(MAKE) MACOSX_UNIVERSAL=true all_fastdebug_universal
+universal_debug:
+	$(MAKE) MACOSX_UNIVERSAL=true all_debug_universal
+
+
+# Universal builds include 1 or more architectures in a single binary
+all_product_universal:
+#	$(QUIETLY) $(MAKE) ARCH_DATA_MODEL=32 $(COMMON_VM_PRODUCT_TARGETS)
+	$(QUIETLY) $(MAKE) ARCH_DATA_MODEL=64 $(COMMON_VM_PRODUCT_TARGETS)
+	$(QUIETLY) $(MAKE) EXPORT_SUBDIR= universalize
+all_fastdebug_universal:
+#	$(QUIETLY) $(MAKE) ARCH_DATA_MODEL=32 $(COMMON_VM_FASTDEBUG_TARGETS)
+	$(QUIETLY) $(MAKE) ARCH_DATA_MODEL=64 $(COMMON_VM_FASTDEBUG_TARGETS)
+	$(QUIETLY) $(MAKE) EXPORT_SUBDIR=/fastdebug universalize
+all_debug_universal:
+#	$(QUIETLY) $(MAKE) ARCH_DATA_MODEL=32 $(COMMON_VM_DEBUG_TARGETS)
+	$(QUIETLY) $(MAKE) ARCH_DATA_MODEL=64 $(COMMON_VM_DEBUG_TARGETS)
+	$(QUIETLY) $(MAKE) EXPORT_SUBDIR=/debug universalize
+
+
+# Consolidate architecture builds into a single Universal binary
+universalize: $(UNIVERSAL_LIPO_LIST) $(UNIVERSAL_COPY_LIST)
+	$(RM) -r $(EXPORT_PATH)/jre/lib/{i386,amd64}
+
+
+# Package built libraries in a universal binary
+$(UNIVERSAL_LIPO_LIST):
+	BUILT_LIPO_FILES="`find $(EXPORT_JRE_LIB_DIR)/{i386,amd64}/$(subst $(EXPORT_JRE_LIB_DIR)/,,$@) 2>/dev/null`"; \
+	if [ -n "$${BUILT_LIPO_FILES}" ]; then \
+	  $(MKDIR) -p $(shell dirname $@); \
+	  lipo -create -output $@ $${BUILT_LIPO_FILES}; \
+	fi	
+
+
+# Copy built non-universal binaries in place
+$(UNIVERSAL_COPY_LIST):
+	BUILT_COPY_FILES="`find $(EXPORT_JRE_LIB_DIR)/{i386,amd64}/$(subst $(EXPORT_JRE_LIB_DIR)/,,$@) 2>/dev/null`"; \
+	if [ -n "$${BUILT_COPY_FILES}" ]; then \
+	  for i in $${BUILT_COPY_FILES}; do \
+	    if [ -f $${i} ]; then \
+	      $(MKDIR) -p $(shell dirname $@); \
+	      $(CP) $${i} $@; \
+	    fi; \
+	  done; \
+	fi
+
+
+# Replace arch specific binaries with universal binaries
+export_universal:
+	$(RM) -r $(EXPORT_PATH)/jre/lib/{i386,amd64}
+	$(RM) -r $(JDK_IMAGE_DIR)/jre/lib/{i386,amd64}
+	$(RM) $(JDK_IMAGE_DIR)/jre/lib/{client,server}/libjsig.$(LIBRARY_SUFFIX)
+	($(CD) $(EXPORT_PATH) && \
+	  $(TAR) -cf - *) | \
+	  ($(CD) $(JDK_IMAGE_DIR) && $(TAR) -xpf -)
+
+
+# Overlay universal binaries
+copy_universal:
+	$(RM) -r $(JDK_IMAGE_DIR)$(COPY_SUBDIR)/jre/lib/{i386,amd64}
+	$(RM) $(JDK_IMAGE_DIR)$(COPY_SUBDIR)/jre/lib/{client,server}/libjsig.$(LIBRARY_SUFFIX)
+	($(CD) $(EXPORT_PATH)$(COPY_SUBDIR) && \
+	  $(TAR) -cf - *) | \
+	  ($(CD) $(JDK_IMAGE_DIR)$(COPY_SUBDIR) && $(TAR) -xpf -)
+
+
+# Additional processing for universal builds
+export_product_jdk::
+	$(MAKE) EXPORT_SUBDIR=           export_universal
+export_optimized_jdk::
+	$(MAKE) EXPORT_SUBDIR=           export_universal
+export_fastdebug_jdk::
+	$(MAKE) EXPORT_SUBDIR=/fastdebug export_universal
+export_debug_jdk::
+	$(MAKE) EXPORT_SUBDIR=/debug     export_universal
+copy_product_jdk::
+	$(MAKE) COPY_SUBDIR=             copy_universal
+copy_fastdebug_jdk::
+	$(MAKE) COPY_SUBDIR=/fastdebug   copy_universal
+copy_debug_jdk::
+	$(MAKE) COPY_SUBDIR=/debug       copy_universal
+
+.PHONY:	universal_product universal_fastdebug universal_debug \
+	all_product_universal all_fastdebug_universal all_debug_universal \
+	universalize export_universal copy_universal
--- a/make/bsd/makefiles/vm.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/bsd/makefiles/vm.make	Mon Feb 27 15:06:36 2012 -0800
@@ -82,18 +82,26 @@
 BUILD_USER    = -DHOTSPOT_BUILD_USER="\"$(HOTSPOT_BUILD_USER)\""
 VM_DISTRO     = -DHOTSPOT_VM_DISTRO="\"$(HOTSPOT_VM_DISTRO)\""
 
-CPPFLAGS =           \
+CXXFLAGS =           \
   ${SYSDEFS}         \
   ${INCLUDES}        \
   ${BUILD_VERSION}   \
   ${BUILD_TARGET}    \
   ${BUILD_USER}      \
   ${HS_LIB_ARCH}     \
-  ${JRE_VERSION}     \
   ${VM_DISTRO}
 
+# This is VERY important! The version define must only be supplied to vm_version.o
+# If not, ccache will not re-use the cache at all, since the version string might contain
+# a time and date. 
+vm_version.o: CXXFLAGS += ${JRE_VERSION} 
+
 ifdef DEFAULT_LIBPATH
-CPPFLAGS += -DDEFAULT_LIBPATH="\"$(DEFAULT_LIBPATH)\""
+CXXFLAGS += -DDEFAULT_LIBPATH="\"$(DEFAULT_LIBPATH)\""
+endif
+
+ifndef JAVASE_EMBEDDED
+CFLAGS += -DINCLUDE_TRACE
 endif
 
 # CFLAGS_WARN holds compiler options to suppress/enable warnings.
@@ -147,6 +155,12 @@
 SOURCE_PATHS+=$(HS_COMMON_SRC)/cpu/$(Platform_arch)/vm
 SOURCE_PATHS+=$(HS_COMMON_SRC)/os_cpu/$(Platform_os_arch)/vm
 
+ifndef JAVASE_EMBEDDED
+SOURCE_PATHS+=$(shell if [ -d $(HS_ALT_SRC)/share/vm/jfr ]; then \
+  find $(HS_ALT_SRC)/share/vm/jfr -type d; \
+  fi)
+endif
+
 CORE_PATHS=$(foreach path,$(SOURCE_PATHS),$(call altsrc,$(path)) $(path))
 CORE_PATHS+=$(GENERATED)/jvmtifiles
 
@@ -258,9 +272,9 @@
   ifeq ($(STATIC_CXX), true)
     LFLAGS_VM              += $(STATIC_LIBGCC)
     LIBS_VM                += $(STATIC_STDCXX)
-    LINK_VM                = $(LINK_LIB.c)
+    LINK_VM                = $(LINK_LIB.CC)
   else
-    LINK_VM                = $(LINK_LIB.CC)
+    LINK_VM                = $(LINK_LIB.CXX)
   endif
 
   LIBS_VM                  += $(LIBS)
@@ -278,7 +292,7 @@
 $(PRECOMPILED_HEADER):
 	$(QUIETLY) echo Generating precompiled header $@
 	$(QUIETLY) mkdir -p $(PRECOMPILED_HEADER_DIR)
-	$(QUIETLY) $(COMPILE.CC) $(DEPFLAGS) -x c++-header $(PRECOMPILED_HEADER_SRC) -o $@ $(COMPILE_DONE)
+	$(QUIETLY) $(COMPILE.CXX) $(DEPFLAGS) -x c++-header $(PRECOMPILED_HEADER_SRC) -o $@ $(COMPILE_DONE)
 
 # making the library:
 
@@ -303,10 +317,10 @@
 $(LIBJVM): $(LIBJVM.o) $(LIBJVM_MAPFILE) $(LD_SCRIPT)
 	$(QUIETLY) {                                                    \
 	    echo Linking vm...;                                         \
-	    $(LINK_LIB.CC/PRE_HOOK)                                     \
+	    $(LINK_LIB.CXX/PRE_HOOK)                                     \
 	    $(LINK_VM) $(LD_SCRIPT_FLAG)                                \
 		       $(LFLAGS_VM) -o $@ $(LIBJVM.o) $(LIBS_VM);       \
-	    $(LINK_LIB.CC/POST_HOOK)                                    \
+	    $(LINK_LIB.CXX/POST_HOOK)                                    \
 	    rm -f $@.1; ln -s $@ $@.1;                                  \
 	    [ -f $(LIBJVM_G) ] || { ln -s $@ $(LIBJVM_G); ln -s $@.1 $(LIBJVM_G).1; }; \
 	}
@@ -335,8 +349,8 @@
 $(LIBJVM).dSYM: $(LIBJVM)
 	dsymutil $(LIBJVM)
 
-# no launcher or libjvm_db for macosx
-build: $(LIBJVM) $(LIBJSIG) $(BUILDLIBSAPROC) dtraceCheck $(LIBJVM).dSYM
+# no libjvm_db for macosx
+build: $(LIBJVM) $(LAUNCHER) $(LIBJSIG) $(BUILDLIBSAPROC) dtraceCheck $(LIBJVM).dSYM
 	echo "Doing vm.make build:"
 else
 build: $(LIBJVM) $(LAUNCHER) $(LIBJSIG) $(LIBJVM_DB) $(BUILDLIBSAPROC)
--- a/make/defs.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/defs.make	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2006, 2011, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -185,6 +185,18 @@
   BOOTDIR=$(ALT_BOOTDIR)
 endif
 
+# Select name of the export directory and honor ALT overrides
+EXPORT_PATH=$(OUTPUTDIR)/export-$(PLATFORM)$(EXPORT_SUBDIR)
+ifneq ($(ALT_EXPORT_PATH),)
+  EXPORT_PATH=$(ALT_EXPORT_PATH)
+endif
+
+# Default jdk image if one is created for you with create_jdk
+JDK_IMAGE_DIR=$(OUTPUTDIR)/jdk-$(PLATFORM)
+ifneq ($(ALT_JDK_IMAGE_DIR),)
+  JDK_IMAGE_DIR=$(ALT_JDK_IMAGE_DIR)
+endif
+
 # The platform dependent defs.make defines platform specific variable such 
 # as ARCH, EXPORT_LIST etc. We must place the include here after BOOTDIR is defined.
 include $(GAMMADIR)/make/$(OSNAME)/makefiles/defs.make
@@ -263,15 +275,6 @@
 # includes this make/defs.make file.
 MAKE_ARGS += HOTSPOT_BUILD_VERSION=$(HOTSPOT_BUILD_VERSION)
 
-# Select name of export directory
-EXPORT_PATH=$(OUTPUTDIR)/export-$(PLATFORM)$(EXPORT_SUBDIR)
-ifneq ($(ALT_EXPORT_PATH),)
-  EXPORT_PATH=$(ALT_EXPORT_PATH)
-endif
-
-# Default jdk image if one is created for you with create_jdk
-JDK_IMAGE_DIR=$(OUTPUTDIR)/jdk-$(PLATFORM)
-
 # Various export sub directories
 EXPORT_INCLUDE_DIR = $(EXPORT_PATH)/include
 EXPORT_DOCS_DIR = $(EXPORT_PATH)/docs
@@ -294,3 +297,7 @@
 EXPORT_LIST += $(EXPORT_INCLUDE_DIR)/jni.h
 EXPORT_LIST += $(EXPORT_INCLUDE_DIR)/$(JDK_INCLUDE_SUBDIR)/jni_md.h
 EXPORT_LIST += $(EXPORT_INCLUDE_DIR)/jmm.h
+
+ifndef JAVASE_EMBEDDED
+EXPORT_LIST += $(EXPORT_INCLUDE_DIR)/jfr.h
+endif
--- a/make/hotspot_version	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/hotspot_version	Mon Feb 27 15:06:36 2012 -0800
@@ -33,9 +33,9 @@
 # Don't put quotes (fail windows build).
 HOTSPOT_VM_COPYRIGHT=Copyright 2011
 
-HS_MAJOR_VER=23
+HS_MAJOR_VER=24
 HS_MINOR_VER=0
-HS_BUILD_NUMBER=08
+HS_BUILD_NUMBER=01
 
 JDK_MAJOR_VER=1
 JDK_MINOR_VER=8
--- a/make/jprt.properties	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/jprt.properties	Mon Feb 27 15:06:36 2012 -0800
@@ -38,7 +38,9 @@
 
 # This tells jprt what default release we want to build
 
-jprt.tools.default.release=${jprt.submit.release}
+jprt.hotspot.default.release=jdk7
+
+jprt.tools.default.release=${jprt.submit.option.release?${jprt.submit.option.release}:${jprt.hotspot.default.release}}
 
 # Disable syncing the source after builds and tests are done.
 
@@ -52,154 +54,58 @@
 # Define the Solaris platforms we want for the various releases
 jprt.my.solaris.sparc.jdk8=solaris_sparc_5.10
 jprt.my.solaris.sparc.jdk7=solaris_sparc_5.10
-jprt.my.solaris.sparc.jdk7b107=solaris_sparc_5.10
-jprt.my.solaris.sparc.jdk7temp=solaris_sparc_5.10
-jprt.my.solaris.sparc.jdk6=solaris_sparc_5.8
-jprt.my.solaris.sparc.jdk6perf=solaris_sparc_5.8
-jprt.my.solaris.sparc.jdk6u10=solaris_sparc_5.8
-jprt.my.solaris.sparc.jdk6u14=solaris_sparc_5.8
-jprt.my.solaris.sparc.jdk6u18=solaris_sparc_5.8
-jprt.my.solaris.sparc.jdk6u20=solaris_sparc_5.8
-jprt.my.solaris.sparc.ejdk7=${jprt.my.solaris.sparc.jdk7}
-jprt.my.solaris.sparc.ejdk6=${jprt.my.solaris.sparc.jdk6}
 jprt.my.solaris.sparc=${jprt.my.solaris.sparc.${jprt.tools.default.release}}
 
 jprt.my.solaris.sparcv9.jdk8=solaris_sparcv9_5.10
 jprt.my.solaris.sparcv9.jdk7=solaris_sparcv9_5.10
-jprt.my.solaris.sparcv9.jdk7b107=solaris_sparcv9_5.10
-jprt.my.solaris.sparcv9.jdk7temp=solaris_sparcv9_5.10
-jprt.my.solaris.sparcv9.jdk6=solaris_sparcv9_5.8
-jprt.my.solaris.sparcv9.jdk6perf=solaris_sparcv9_5.8
-jprt.my.solaris.sparcv9.jdk6u10=solaris_sparcv9_5.8
-jprt.my.solaris.sparcv9.jdk6u14=solaris_sparcv9_5.8
-jprt.my.solaris.sparcv9.jdk6u18=solaris_sparcv9_5.8
-jprt.my.solaris.sparcv9.jdk6u20=solaris_sparcv9_5.8
-jprt.my.solaris.sparcv9.ejdk7=${jprt.my.solaris.sparcv9.jdk7}
-jprt.my.solaris.sparcv9.ejdk6=${jprt.my.solaris.sparcv9.jdk6}
 jprt.my.solaris.sparcv9=${jprt.my.solaris.sparcv9.${jprt.tools.default.release}}
 
 jprt.my.solaris.i586.jdk8=solaris_i586_5.10
 jprt.my.solaris.i586.jdk7=solaris_i586_5.10
-jprt.my.solaris.i586.jdk7b107=solaris_i586_5.10
-jprt.my.solaris.i586.jdk7temp=solaris_i586_5.10
-jprt.my.solaris.i586.jdk6=solaris_i586_5.8
-jprt.my.solaris.i586.jdk6perf=solaris_i586_5.8
-jprt.my.solaris.i586.jdk6u10=solaris_i586_5.8
-jprt.my.solaris.i586.jdk6u14=solaris_i586_5.8
-jprt.my.solaris.i586.jdk6u18=solaris_i586_5.8
-jprt.my.solaris.i586.jdk6u20=solaris_i586_5.8
-jprt.my.solaris.i586.ejdk7=${jprt.my.solaris.i586.jdk7}
-jprt.my.solaris.i586.ejdk6=${jprt.my.solaris.i586.jdk6}
 jprt.my.solaris.i586=${jprt.my.solaris.i586.${jprt.tools.default.release}}
 
 jprt.my.solaris.x64.jdk8=solaris_x64_5.10
 jprt.my.solaris.x64.jdk7=solaris_x64_5.10
-jprt.my.solaris.x64.jdk7b107=solaris_x64_5.10
-jprt.my.solaris.x64.jdk7temp=solaris_x64_5.10
-jprt.my.solaris.x64.jdk6=solaris_x64_5.10
-jprt.my.solaris.x64.jdk6perf=solaris_x64_5.10
-jprt.my.solaris.x64.jdk6u10=solaris_x64_5.10
-jprt.my.solaris.x64.jdk6u14=solaris_x64_5.10
-jprt.my.solaris.x64.jdk6u18=solaris_x64_5.10
-jprt.my.solaris.x64.jdk6u20=solaris_x64_5.10
-jprt.my.solaris.x64.ejdk7=${jprt.my.solaris.x64.jdk7}
-jprt.my.solaris.x64.ejdk6=${jprt.my.solaris.x64.jdk6}
 jprt.my.solaris.x64=${jprt.my.solaris.x64.${jprt.tools.default.release}}
 
 jprt.my.linux.i586.jdk8=linux_i586_2.6
 jprt.my.linux.i586.jdk7=linux_i586_2.6
-jprt.my.linux.i586.jdk7b107=linux_i586_2.6
-jprt.my.linux.i586.jdk7temp=linux_i586_2.6
-jprt.my.linux.i586.jdk6=linux_i586_2.4
-jprt.my.linux.i586.jdk6perf=linux_i586_2.4
-jprt.my.linux.i586.jdk6u10=linux_i586_2.4
-jprt.my.linux.i586.jdk6u14=linux_i586_2.4
-jprt.my.linux.i586.jdk6u18=linux_i586_2.4
-jprt.my.linux.i586.jdk6u20=linux_i586_2.4
-jprt.my.linux.i586.ejdk7=linux_i586_2.6
-jprt.my.linux.i586.ejdk6=linux_i586_2.6
 jprt.my.linux.i586=${jprt.my.linux.i586.${jprt.tools.default.release}}
 
 jprt.my.linux.x64.jdk8=linux_x64_2.6
 jprt.my.linux.x64.jdk7=linux_x64_2.6
-jprt.my.linux.x64.jdk7b107=linux_x64_2.6
-jprt.my.linux.x64.jdk7temp=linux_x64_2.6
-jprt.my.linux.x64.jdk6=linux_x64_2.4
-jprt.my.linux.x64.jdk6perf=linux_x64_2.4
-jprt.my.linux.x64.jdk6u10=linux_x64_2.4
-jprt.my.linux.x64.jdk6u14=linux_x64_2.4
-jprt.my.linux.x64.jdk6u18=linux_x64_2.4
-jprt.my.linux.x64.jdk6u20=linux_x64_2.4
-jprt.my.linux.x64.ejdk7=${jprt.my.linux.x64.jdk7}
-jprt.my.linux.x64.ejdk6=${jprt.my.linux.x64.jdk6}
 jprt.my.linux.x64=${jprt.my.linux.x64.${jprt.tools.default.release}}
 
 jprt.my.linux.ppc.jdk8=linux_ppc_2.6
 jprt.my.linux.ppc.jdk7=linux_ppc_2.6
-jprt.my.linux.ppc.jdk7b107=linux_ppc_2.6
-jprt.my.linux.ppc.jdk7temp=linux_ppc_2.6
-jprt.my.linux.ppc.ejdk6=linux_ppc_2.6
-jprt.my.linux.ppc.ejdk7=linux_ppc_2.6
 jprt.my.linux.ppc=${jprt.my.linux.ppc.${jprt.tools.default.release}}
 
 jprt.my.linux.ppcv2.jdk8=linux_ppcv2_2.6
 jprt.my.linux.ppcv2.jdk7=linux_ppcv2_2.6
-jprt.my.linux.ppcv2.jdk7b107=linux_ppcv2_2.6
-jprt.my.linux.ppcv2.jdk7temp=linux_ppcv2_2.6
-jprt.my.linux.ppcv2.ejdk6=linux_ppcv2_2.6
-jprt.my.linux.ppcv2.ejdk7=linux_ppcv2_2.6
 jprt.my.linux.ppcv2=${jprt.my.linux.ppcv2.${jprt.tools.default.release}}
 
 jprt.my.linux.ppcsflt.jdk8=linux_ppcsflt_2.6
 jprt.my.linux.ppcsflt.jdk7=linux_ppcsflt_2.6
-jprt.my.linux.ppcsflt.jdk7b107=linux_ppcsflt_2.6
-jprt.my.linux.ppcsflt.jdk7temp=linux_ppcsflt_2.6
-jprt.my.linux.ppcsflt.ejdk6=linux_ppcsflt_2.6
-jprt.my.linux.ppcsflt.ejdk7=linux_ppcsflt_2.6
 jprt.my.linux.ppcsflt=${jprt.my.linux.ppcsflt.${jprt.tools.default.release}}
 
 jprt.my.linux.armvfp.jdk8=linux_armvfp_2.6
 jprt.my.linux.armvfp.jdk7=linux_armvfp_2.6
-jprt.my.linux.armvfp.jdk7b107=linux_armvfp_2.6
-jprt.my.linux.armvfp.jdk7temp=linux_armvfp_2.6
-jprt.my.linux.armvfp.ejdk6=linux_armvfp_2.6
-jprt.my.linux.armvfp.ejdk7=linux_armvfp_2.6
 jprt.my.linux.armvfp=${jprt.my.linux.armvfp.${jprt.tools.default.release}}
 
 jprt.my.linux.armsflt.jdk8=linux_armsflt_2.6
 jprt.my.linux.armsflt.jdk7=linux_armsflt_2.6
-jprt.my.linux.armsflt.jdk7b107=linux_armsflt_2.6
-jprt.my.linux.armsflt.jdk7temp=linux_armsflt_2.6
-jprt.my.linux.armsflt.ejdk6=linux_armsflt_2.6
-jprt.my.linux.armsflt.ejdk7=linux_armsflt_2.6
 jprt.my.linux.armsflt=${jprt.my.linux.armsflt.${jprt.tools.default.release}}
 
+jprt.my.macosx.x64.jdk8=macosx_x64_10.7
+jprt.my.macosx.x64.jdk7=macosx_x64_10.7
+jprt.my.macosx.x64=${jprt.my.macosx.x64.${jprt.tools.default.release}}
+
 jprt.my.windows.i586.jdk8=windows_i586_5.1
 jprt.my.windows.i586.jdk7=windows_i586_5.1
-jprt.my.windows.i586.jdk7b107=windows_i586_5.0
-jprt.my.windows.i586.jdk7temp=windows_i586_5.0
-jprt.my.windows.i586.jdk6=windows_i586_5.0
-jprt.my.windows.i586.jdk6perf=windows_i586_5.0
-jprt.my.windows.i586.jdk6u10=windows_i586_5.0
-jprt.my.windows.i586.jdk6u14=windows_i586_5.0
-jprt.my.windows.i586.jdk6u18=windows_i586_5.0
-jprt.my.windows.i586.jdk6u20=windows_i586_5.0
-jprt.my.windows.i586.ejdk7=${jprt.my.windows.i586.jdk7}
-jprt.my.windows.i586.ejdk6=${jprt.my.windows.i586.jdk6}
 jprt.my.windows.i586=${jprt.my.windows.i586.${jprt.tools.default.release}}
 
 jprt.my.windows.x64.jdk8=windows_x64_5.2
 jprt.my.windows.x64.jdk7=windows_x64_5.2
-jprt.my.windows.x64.jdk7b107=windows_x64_5.2
-jprt.my.windows.x64.jdk7temp=windows_x64_5.2
-jprt.my.windows.x64.jdk6=windows_x64_5.2
-jprt.my.windows.x64.jdk6perf=windows_x64_5.2
-jprt.my.windows.x64.jdk6u10=windows_x64_5.2
-jprt.my.windows.x64.jdk6u14=windows_x64_5.2
-jprt.my.windows.x64.jdk6u18=windows_x64_5.2
-jprt.my.windows.x64.jdk6u20=windows_x64_5.2
-jprt.my.windows.x64.ejdk7=${jprt.my.windows.x64.jdk7}
-jprt.my.windows.x64.ejdk6=${jprt.my.windows.x64.jdk6}
 jprt.my.windows.x64=${jprt.my.windows.x64.${jprt.tools.default.release}}
 
 # Standard list of jprt build targets for this source tree
@@ -211,6 +117,7 @@
     ${jprt.my.solaris.x64}-{product|fastdebug|debug}, \
     ${jprt.my.linux.i586}-{product|fastdebug|debug}, \
     ${jprt.my.linux.x64}-{product|fastdebug}, \
+    ${jprt.my.macosx.x64}-{product|fastdebug|debug}, \
     ${jprt.my.windows.i586}-{product|fastdebug|debug}, \
     ${jprt.my.windows.x64}-{product|fastdebug|debug}
 
@@ -416,6 +323,30 @@
     ${jprt.my.linux.x64}-{product|fastdebug}-c2-jbb_G1, \
     ${jprt.my.linux.x64}-{product|fastdebug}-c2-jbb_ParOldGC
 
+jprt.my.macosx.x64.test.targets = \
+    ${jprt.my.macosx.x64}-{product|fastdebug}-c2-jvm98, \
+    ${jprt.my.macosx.x64}-{product|fastdebug}-c2-jvm98_nontiered, \
+    ${jprt.my.macosx.x64}-{product|fastdebug}-c2-scimark, \
+    ${jprt.my.macosx.x64}-{product|fastdebug}-c2-GCBasher_default, \
+    ${jprt.my.macosx.x64}-{product|fastdebug}-c2-GCBasher_SerialGC, \
+    ${jprt.my.macosx.x64}-{product|fastdebug}-c2-GCBasher_ParallelGC, \
+    ${jprt.my.macosx.x64}-{product|fastdebug}-c2-GCBasher_ParNewGC, \
+    ${jprt.my.macosx.x64}-{product|fastdebug}-c2-GCBasher_CMS, \
+    ${jprt.my.macosx.x64}-{product|fastdebug}-c2-GCBasher_G1, \
+    ${jprt.my.macosx.x64}-{product|fastdebug}-c2-GCBasher_ParOldGC, \
+    ${jprt.my.macosx.x64}-{product|fastdebug}-c2-GCOld_default, \
+    ${jprt.my.macosx.x64}-{product|fastdebug}-c2-GCOld_SerialGC, \
+    ${jprt.my.macosx.x64}-{product|fastdebug}-c2-GCOld_ParallelGC, \
+    ${jprt.my.macosx.x64}-{product|fastdebug}-c2-GCOld_ParNewGC, \
+    ${jprt.my.macosx.x64}-{product|fastdebug}-c2-GCOld_CMS, \
+    ${jprt.my.macosx.x64}-{product|fastdebug}-c2-GCOld_G1, \
+    ${jprt.my.macosx.x64}-{product|fastdebug}-c2-GCOld_ParOldGC
+#    ${jprt.my.macosx.x64}-{product|fastdebug}-c2-jbb_default, \
+#    ${jprt.my.macosx.x64}-{product|fastdebug}-c2-jbb_default_tiered, \
+#    ${jprt.my.macosx.x64}-{product|fastdebug}-c2-jbb_ParallelGC, \
+#    ${jprt.my.macosx.x64}-{product|fastdebug}-c2-jbb_G1, \
+#    ${jprt.my.macosx.x64}-{product|fastdebug}-c2-jbb_ParOldGC
+
 jprt.my.windows.i586.test.targets = \
     ${jprt.my.windows.i586}-{product|fastdebug}-{c1|c2}-jvm98, \
     ${jprt.my.windows.i586}-{product|fastdebug}-c2-jvm98_nontiered, \
@@ -492,6 +423,7 @@
   ${jprt.my.solaris.x64.test.targets}, \
   ${jprt.my.linux.i586.test.targets}, \
   ${jprt.my.linux.x64.test.targets}, \
+  ${jprt.my.macosx.x64.test.targets}, \
   ${jprt.my.windows.i586.test.targets}, \
   ${jprt.my.windows.x64.test.targets}, \
   ${jprt.test.targets.open}
@@ -509,16 +441,6 @@
 
 jprt.test.targets.jdk8=${jprt.test.targets.standard}
 jprt.test.targets.jdk7=${jprt.test.targets.standard}
-jprt.test.targets.jdk7temp=${jprt.test.targets.standard}
-jprt.test.targets.jdk7b105=${jprt.test.targets.standard}
-jprt.test.targets.jdk6=${jprt.test.targets.standard}
-jprt.test.targets.jdk6perf=${jprt.test.targets.standard}
-jprt.test.targets.jdk6u10=${jprt.test.targets.standard}
-jprt.test.targets.jdk6u14=${jprt.test.targets.standard}
-jprt.test.targets.jdk6u18=${jprt.test.targets.standard}
-jprt.test.targets.jdk6u20=${jprt.test.targets.standard}
-jprt.test.targets.ejdk6=${jprt.test.targets.embedded}
-jprt.test.targets.ejdk7=${jprt.test.targets.embedded}
 jprt.test.targets=${jprt.test.targets.${jprt.tools.default.release}}
 
 # The default test/Makefile targets that should be run
@@ -538,6 +460,7 @@
   ${jprt.my.solaris.x64}-*-c2-servertest, \
   ${jprt.my.linux.i586}-*-c2-servertest, \
   ${jprt.my.linux.x64}-*-c2-servertest, \
+  ${jprt.my.macosx.x64}-*-c2-servertest, \
   ${jprt.my.windows.i586}-*-c2-servertest, \
   ${jprt.my.windows.x64}-*-c2-servertest
 
@@ -548,6 +471,7 @@
   ${jprt.my.solaris.x64}-fastdebug-c2-internalvmtests, \
   ${jprt.my.linux.i586}-fastdebug-c2-internalvmtests, \
   ${jprt.my.linux.x64}-fastdebug-c2-internalvmtests, \
+  ${jprt.my.macosx.x64}-fastdebug-c2-internalvmtests, \
   ${jprt.my.windows.i586}-fastdebug-c2-internalvmtests, \
   ${jprt.my.windows.x64}-fastdebug-c2-internalvmtests
   
@@ -561,15 +485,5 @@
 
 jprt.make.rule.test.targets.jdk8=${jprt.make.rule.test.targets.standard}
 jprt.make.rule.test.targets.jdk7=${jprt.make.rule.test.targets.standard}
-jprt.make.rule.test.targets.jdk7temp=${jprt.make.rule.test.targets.standard}
-jprt.make.rule.test.targets.jdk7b107=${jprt.make.rule.test.targets.standard}
-jprt.make.rule.test.targets.jdk6=${jprt.make.rule.test.targets.standard}
-jprt.make.rule.test.targets.jdk6perf=${jprt.make.rule.test.targets.standard}
-jprt.make.rule.test.targets.jdk6u10=${jprt.make.rule.test.targets.standard}
-jprt.make.rule.test.targets.jdk6u14=${jprt.make.rule.test.targets.standard}
-jprt.make.rule.test.targets.jdk6u18=${jprt.make.rule.test.targets.standard}
-jprt.make.rule.test.targets.jdk6u20=${jprt.make.rule.test.targets.standard}
-jprt.make.rule.test.targets.ejdk6=${jprt.make.rule.test.targets.embedded}
-jprt.make.rule.test.targets.ejdk7=${jprt.make.rule.test.targets.embedded}
 jprt.make.rule.test.targets=${jprt.make.rule.test.targets.${jprt.tools.default.release}}
 
--- a/make/linux/makefiles/adlc.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/linux/makefiles/adlc.make	Mon Feb 27 15:06:36 2012 -0800
@@ -39,9 +39,16 @@
 
 SOURCE.AD = $(OUTDIR)/$(OS)_$(Platform_arch_model).ad 
 
-SOURCES.AD = \
+ifeq ("${Platform_arch_model}", "${Platform_arch}")
+  SOURCES.AD = \
   $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \
   $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad)
+else
+  SOURCES.AD = \
+  $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \
+  $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch).ad) \
+  $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad)
+endif
 
 EXEC	= $(OUTDIR)/adlc
 
@@ -54,10 +61,10 @@
 INCLUDES += $(Src_Dirs_I:%=-I%)
 
 # set flags for adlc compilation
-CPPFLAGS = $(SYSDEFS) $(INCLUDES)
+CXXFLAGS = $(SYSDEFS) $(INCLUDES)
 
 # Force assertions on.
-CPPFLAGS += -DASSERT
+CXXFLAGS += -DASSERT
 
 # CFLAGS_WARN holds compiler options to suppress/enable warnings.
 # Compiler warnings are treated as errors
@@ -102,7 +109,7 @@
 
 $(EXEC) : $(OBJECTS)
 	@echo Making adlc
-	$(QUIETLY) $(HOST.LINK_NOPROF.CC) -o $(EXEC) $(OBJECTS)
+	$(QUIETLY) $(HOST.LINK_NOPROF.CXX) -o $(EXEC) $(OBJECTS)
 
 # Random dependencies:
 $(OBJECTS): opcodes.hpp classes.hpp adlc.hpp adlcVMDeps.hpp adlparse.hpp archDesc.hpp arena.hpp dict2.hpp filebuff.hpp forms.hpp formsopt.hpp formssel.hpp
@@ -204,14 +211,14 @@
 $(OUTDIR)/%.o: %.cpp
 	@echo Compiling $<
 	$(QUIETLY) $(REMOVE_TARGET)
-	$(QUIETLY) $(HOST.COMPILE.CC) -o $@ $< $(COMPILE_DONE)
+	$(QUIETLY) $(HOST.COMPILE.CXX) -o $@ $< $(COMPILE_DONE)
 
 # Some object files are given a prefix, to disambiguate
 # them from objects of the same name built for the VM.
 $(OUTDIR)/adlc-%.o: %.cpp
 	@echo Compiling $<
 	$(QUIETLY) $(REMOVE_TARGET)
-	$(QUIETLY) $(HOST.COMPILE.CC) -o $@ $< $(COMPILE_DONE)
+	$(QUIETLY) $(HOST.COMPILE.CXX) -o $@ $< $(COMPILE_DONE)
 
 # #########################################################################
 
--- a/make/linux/makefiles/buildtree.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/linux/makefiles/buildtree.make	Mon Feb 27 15:06:36 2012 -0800
@@ -326,11 +326,10 @@
 	$(BUILDTREE_COMMENT); \
 	[ -n "$$JAVA_HOME" ] && { echo ": \$${JAVA_HOME:=$${JAVA_HOME}}"; }; \
 	{ \
-	echo "LD_LIBRARY_PATH=.:$${LD_LIBRARY_PATH:+$$LD_LIBRARY_PATH:}\$${JAVA_HOME}/jre/lib/${LIBARCH}/native_threads:\$${JAVA_HOME}/jre/lib/${LIBARCH}:${GCC_LIB}"; \
 	echo "CLASSPATH=$${CLASSPATH:+$$CLASSPATH:}.:\$${JAVA_HOME}/jre/lib/rt.jar:\$${JAVA_HOME}/jre/lib/i18n.jar"; \
 	} | sed s:$${JAVA_HOME:--------}:\$${JAVA_HOME}:g; \
 	echo "HOTSPOT_BUILD_USER=\"$${LOGNAME:-$$USER} in `basename $(GAMMADIR)`\""; \
-	echo "export JAVA_HOME LD_LIBRARY_PATH CLASSPATH HOTSPOT_BUILD_USER"; \
+	echo "export JAVA_HOME CLASSPATH HOTSPOT_BUILD_USER"; \
 	) > $@
 
 env.csh: env.sh
@@ -384,7 +383,7 @@
 JAVA_FLAG/64 = -d64
 
 WRONG_DATA_MODE_MSG = \
-	echo "JAVA_HOME must point to $(DATA_MODE)bit JDK."
+	echo "JAVA_HOME must point to a $(DATA_MODE)-bit OpenJDK."
 
 CROSS_COMPILING_MSG = \
 	echo "Cross compiling for ARCH $(CROSS_COMPILE_ARCH), skipping gamma run."
@@ -392,19 +391,78 @@
 test_gamma:  $(BUILDTREE_MAKE) $(GAMMADIR)/make/test/Queens.java
 	@echo Creating $@ ...
 	$(QUIETLY) ( \
-	echo '#!/bin/sh'; \
+	echo "#!/bin/sh"; \
+	echo ""; \
 	$(BUILDTREE_COMMENT); \
-	echo '. ./env.sh'; \
-	echo "if [ \"$(CROSS_COMPILE_ARCH)\" != \"\" ]; then { $(CROSS_COMPILING_MSG); exit 0; }; fi"; \
-	echo "if [ -z \$$JAVA_HOME ]; then { $(NO_JAVA_HOME_MSG); exit 0; }; fi"; \
-	echo "if ! \$${JAVA_HOME}/bin/java $(JAVA_FLAG) -fullversion 2>&1 > /dev/null"; \
-	echo "then"; \
-	echo "  $(WRONG_DATA_MODE_MSG); exit 0;"; \
+	echo ""; \
+	echo "# Include environment settings for gamma run"; \
+	echo ""; \
+	echo ". ./env.sh"; \
+	echo ""; \
+	echo "# Do not run gamma test for cross compiles"; \
+	echo ""; \
+	echo "if [ -n \"$(CROSS_COMPILE_ARCH)\" ]; then "; \
+	echo "  $(CROSS_COMPILING_MSG)"; \
+	echo "  exit 0"; \
+	echo "fi"; \
+	echo ""; \
+	echo "# Make sure JAVA_HOME is set as it is required for gamma"; \
+	echo ""; \
+	echo "if [ -z \"\$${JAVA_HOME}\" ]; then "; \
+	echo "  $(NO_JAVA_HOME_MSG)"; \
+	echo "  exit 0"; \
+	echo "fi"; \
+	echo ""; \
+	echo "# Check JAVA_HOME version to be used for the test"; \
+	echo ""; \
+	echo "\$${JAVA_HOME}/bin/java $(JAVA_FLAG) -fullversion > /dev/null 2>&1"; \
+	echo "if [ \$$? -ne 0 ]; then "; \
+	echo "  $(WRONG_DATA_MODE_MSG)"; \
+	echo "  exit 0"; \
 	echo "fi"; \
+	echo ""; \
+	echo "# Use gamma_g if it exists"; \
+	echo ""; \
+	echo "GAMMA_PROG=gamma"; \
+	echo "if [ -f gamma_g ]; then "; \
+	echo "  GAMMA_PROG=gamma_g"; \
+	echo "fi"; \
+	echo ""; \
+	echo "if [ \"$(OS_VENDOR)\" = \"Darwin\" ]; then "; \
+	echo "  # Ensure architecture for gamma and JAVA_HOME is the same."; \
+	echo "  # NOTE: gamma assumes the OpenJDK directory layout."; \
+	echo ""; \
+	echo "  GAMMA_ARCH=\"\`file \$${GAMMA_PROG} | awk '{print \$$NF}'\`\""; \
+	echo "  JVM_LIB=\"\$${JAVA_HOME}/jre/lib/libjava.$(LIBRARY_SUFFIX)\""; \
+	echo "  if [ ! -f \$${JVM_LIB} ]; then"; \
+	echo "    JVM_LIB=\"\$${JAVA_HOME}/jre/lib/$${LIBARCH}/libjava.$(LIBRARY_SUFFIX)\""; \
+	echo "  fi"; \
+	echo "  if [ ! -f \$${JVM_LIB} ] || [ -z \"\`file \$${JVM_LIB} | grep \$${GAMMA_ARCH}\`\" ]; then "; \
+	echo "    $(WRONG_DATA_MODE_MSG)"; \
+	echo "    exit 0"; \
+	echo "  fi"; \
+	echo "fi"; \
+	echo ""; \
+	echo "# Compile Queens program for test"; \
+	echo ""; \
 	echo "rm -f Queens.class"; \
 	echo "\$${JAVA_HOME}/bin/javac -d . $(GAMMADIR)/make/test/Queens.java"; \
-	echo '[ -f gamma_g ] && { gamma=gamma_g; }'; \
-	echo './$${gamma:-gamma} $(TESTFLAGS) Queens < /dev/null'; \
+	echo ""; \
+	echo "# Set library path solely for gamma launcher test run"; \
+	echo ""; \
+	echo "LD_LIBRARY_PATH=.:$${LD_LIBRARY_PATH:+$$LD_LIBRARY_PATH:}\$${JAVA_HOME}/jre/lib/${LIBARCH}/native_threads:\$${JAVA_HOME}/jre/lib/${LIBARCH}:${GCC_LIB}"; \
+	echo "export LD_LIBRARY_PATH"; \
+	echo "unset LD_LIBRARY_PATH_32"; \
+	echo "unset LD_LIBRARY_PATH_64"; \
+	echo ""; \
+	echo "if [ \"$(OS_VENDOR)\" = \"Darwin\" ]; then "; \
+	echo "  DYLD_LIBRARY_PATH=.:$${DYLD_LIBRARY_PATH:+$$DYLD_LIBRARY_PATH:}\$${JAVA_HOME}/jre/lib/native_threads:\$${JAVA_HOME}/jre/lib:$${DYLD_LIBRARY_PATH:+$$DYLD_LIBRARY_PATH:}\$${JAVA_HOME}/jre/lib/${LIBARCH}/native_threads:\$${JAVA_HOME}/jre/lib/${LIBARCH}:${GCC_LIB}"; \
+	echo "  export DYLD_LIBRARY_PATH"; \
+	echo "fi"; \
+	echo ""; \
+	echo "# Use the gamma launcher and JAVA_HOME to run the test"; \
+	echo ""; \
+	echo "./\$${GAMMA_PROG} $(TESTFLAGS) Queens < /dev/null"; \
 	) > $@
 	$(QUIETLY) chmod +x $@
 
--- a/make/linux/makefiles/gcc.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/linux/makefiles/gcc.make	Mon Feb 27 15:06:36 2012 -0800
@@ -23,19 +23,19 @@
 #
 
 #------------------------------------------------------------------------
-# CC, CPP & AS
+# CC, CXX & AS
 
 # When cross-compiling the ALT_COMPILER_PATH points
 # to the cross-compilation toolset
 ifdef CROSS_COMPILE_ARCH
-CPP = $(ALT_COMPILER_PATH)/g++
+CXX = $(ALT_COMPILER_PATH)/g++
 CC  = $(ALT_COMPILER_PATH)/gcc
-HOSTCPP = g++
+HOSTCXX = g++
 HOSTCC  = gcc
 else
-CPP = g++
+CXX = g++
 CC  = gcc
-HOSTCPP = $(CPP)
+HOSTCXX = $(CXX)
 HOSTCC  = $(CC)
 endif
 
--- a/make/linux/makefiles/launcher.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/linux/makefiles/launcher.make	Mon Feb 27 15:06:36 2012 -0800
@@ -54,10 +54,10 @@
   LIBS_LAUNCHER             += -l$(JVM) $(LIBS)
 endif
 
-LINK_LAUNCHER = $(LINK.c)
+LINK_LAUNCHER = $(LINK.CC)
 
-LINK_LAUNCHER/PRE_HOOK  = $(LINK_LIB.CC/PRE_HOOK)
-LINK_LAUNCHER/POST_HOOK = $(LINK_LIB.CC/POST_HOOK)
+LINK_LAUNCHER/PRE_HOOK  = $(LINK_LIB.CXX/PRE_HOOK)
+LINK_LAUNCHER/POST_HOOK = $(LINK_LIB.CXX/POST_HOOK)
 
 LAUNCHER_OUT = launcher
 
@@ -73,11 +73,11 @@
 
 $(LAUNCHER_OUT)/%.o: $(LAUNCHERDIR_SHARE)/%.c
 	$(QUIETLY) [ -d $(LAUNCHER_OUT) ] || { mkdir -p $(LAUNCHER_OUT); }
-	$(QUIETLY) $(CC) -g -o $@ -c $< -MMD $(LAUNCHERFLAGS) $(CPPFLAGS)
+	$(QUIETLY) $(CC) -g -o $@ -c $< -MMD $(LAUNCHERFLAGS) $(CXXFLAGS)
 
 $(LAUNCHER_OUT)/%.o: $(LAUNCHERDIR)/%.c
 	$(QUIETLY) [ -d $(LAUNCHER_OUT) ] || { mkdir -p $(LAUNCHER_OUT); }
-	$(QUIETLY) $(CC) -g -o $@ -c $< -MMD $(LAUNCHERFLAGS) $(CPPFLAGS)
+	$(QUIETLY) $(CC) -g -o $@ -c $< -MMD $(LAUNCHERFLAGS) $(CXXFLAGS)
 
 $(LAUNCHER): $(OBJS) $(LIBJVM) $(LAUNCHER_MAPFILE)
 	$(QUIETLY) echo Linking launcher...
--- a/make/linux/makefiles/ppc.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/linux/makefiles/ppc.make	Mon Feb 27 15:06:36 2012 -0800
@@ -28,3 +28,6 @@
 # Must also specify if CPU is big endian
 CFLAGS += -DVM_BIG_ENDIAN
 
+ifdef E500V2
+ASFLAGS += -Wa,-mspe -Wa,--defsym -Wa,E500V2=1 
+endif
--- a/make/linux/makefiles/product.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/linux/makefiles/product.make	Mon Feb 27 15:06:36 2012 -0800
@@ -52,4 +52,4 @@
 
 # If we can create .debuginfo files, then the VM is stripped in vm.make
 # and this macro is not used.
-# LINK_LIB.CC/POST_HOOK += $(STRIP_$(LINK_INTO))
+# LINK_LIB.CXX/POST_HOOK += $(STRIP_$(LINK_INTO))
--- a/make/linux/makefiles/rules.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/linux/makefiles/rules.make	Mon Feb 27 15:06:36 2012 -0800
@@ -27,52 +27,39 @@
 # Tell make that .cpp is important
 .SUFFIXES: .cpp $(SUFFIXES)
 
-# For now.  Other makefiles use CPP as the c++ compiler, but that should really
-# name the preprocessor.
-ifeq    ($(CCC),)
-CCC             = $(CPP)
-endif
-
 DEMANGLER       = c++filt
 DEMANGLE        = $(DEMANGLER) < $@ > .$@ && mv -f .$@ $@
 
-# $(CC) is the c compiler (cc/gcc), $(CCC) is the c++ compiler (CC/g++).
-C_COMPILE       = $(CC) $(CPPFLAGS) $(CFLAGS)
-CC_COMPILE      = $(CCC) $(CPPFLAGS) $(CFLAGS)
+# $(CC) is the c compiler (cc/gcc), $(CXX) is the c++ compiler (CC/g++).
+CC_COMPILE       = $(CC) $(CXXFLAGS) $(CFLAGS)
+CXX_COMPILE      = $(CXX) $(CXXFLAGS) $(CFLAGS)
 
 AS.S            = $(AS) $(ASFLAGS)
 
-COMPILE.c       = $(C_COMPILE) -c
-GENASM.c        = $(C_COMPILE) -S
-LINK.c          = $(CC) $(LFLAGS) $(AOUT_FLAGS) $(PROF_AOUT_FLAGS)
-LINK_LIB.c      = $(CC) $(LFLAGS) $(SHARED_FLAG)
-PREPROCESS.c    = $(C_COMPILE) -E
+COMPILE.CC       = $(CC_COMPILE) -c
+GENASM.CC        = $(CC_COMPILE) -S
+LINK.CC          = $(CC) $(LFLAGS) $(AOUT_FLAGS) $(PROF_AOUT_FLAGS)
+LINK_LIB.CC      = $(CC) $(LFLAGS) $(SHARED_FLAG)
+PREPROCESS.CC    = $(CC_COMPILE) -E
 
-COMPILE.CC      = $(CC_COMPILE) -c
-GENASM.CC       = $(CC_COMPILE) -S
-LINK.CC         = $(CCC) $(LFLAGS) $(AOUT_FLAGS) $(PROF_AOUT_FLAGS)
-LINK_NOPROF.CC  = $(CCC) $(LFLAGS) $(AOUT_FLAGS)
-LINK_LIB.CC     = $(CCC) $(LFLAGS) $(SHARED_FLAG)
-PREPROCESS.CC   = $(CC_COMPILE) -E
+COMPILE.CXX      = $(CXX_COMPILE) -c
+GENASM.CXX       = $(CXX_COMPILE) -S
+LINK.CXX         = $(CXX) $(LFLAGS) $(AOUT_FLAGS) $(PROF_AOUT_FLAGS)
+LINK_NOPROF.CXX  = $(CXX) $(LFLAGS) $(AOUT_FLAGS)
+LINK_LIB.CXX     = $(CXX) $(LFLAGS) $(SHARED_FLAG)
+PREPROCESS.CXX   = $(CXX_COMPILE) -E
 
 # cross compiling the jvm with c2 requires host compilers to build
 # adlc tool
 
-HOST.CC_COMPILE      = $(HOSTCPP) $(CPPFLAGS) $(CFLAGS)
-HOST.COMPILE.CC      = $(HOST.CC_COMPILE) -c
-HOST.LINK_NOPROF.CC  = $(HOSTCPP) $(LFLAGS) $(AOUT_FLAGS)
+HOST.CXX_COMPILE      = $(HOSTCXX) $(CXXFLAGS) $(CFLAGS)
+HOST.COMPILE.CXX      = $(HOST.CXX_COMPILE) -c
+HOST.LINK_NOPROF.CXX  = $(HOSTCXX) $(LFLAGS) $(AOUT_FLAGS)
 
 
 # Effect of REMOVE_TARGET is to delete out-of-date files during "gnumake -k".
 REMOVE_TARGET   = rm -f $@
 
-# Synonyms.
-COMPILE.cpp     = $(COMPILE.CC)
-GENASM.cpp      = $(GENASM.CC)
-LINK.cpp        = $(LINK.CC)
-LINK_LIB.cpp    = $(LINK_LIB.CC)
-PREPROCESS.cpp  = $(PREPROCESS.CC)
-
 # Note use of ALT_BOOTDIR to explicitly specify location of java and
 # javac; this is the same environment variable used in the J2SE build
 # process for overriding the default spec, which is BOOTDIR.
@@ -161,14 +148,14 @@
 %.o: %.cpp
 	@echo Compiling $<
 	$(QUIETLY) $(REMOVE_TARGET)
-	$(QUIETLY) $(COMPILE.CC) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE)
+	$(QUIETLY) $(COMPILE.CXX) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE)
 else
 %.o: %.cpp
 	@echo Compiling $<
 	$(QUIETLY) $(REMOVE_TARGET)
 	$(QUIETLY) $(if $(findstring $@, $(NONPIC_OBJ_FILES)), \
-	   $(subst $(VM_PICFLAG), ,$(COMPILE.CC)) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE), \
-	   $(COMPILE.CC) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE))
+	   $(subst $(VM_PICFLAG), ,$(COMPILE.CXX)) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE), \
+	   $(COMPILE.CXX) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE))
 endif
 
 %.o: %.s
@@ -178,13 +165,13 @@
 
 %.s: %.cpp
 	@echo Generating assembly for $<
-	$(QUIETLY) $(GENASM.CC) -o $@ $<
+	$(QUIETLY) $(GENASM.CXX) -o $@ $<
 	$(QUIETLY) $(DEMANGLE) $(COMPILE_DONE)
 
 # Intermediate files (for debugging macros)
 %.i: %.cpp
 	@echo Preprocessing $< to $@
-	$(QUIETLY) $(PREPROCESS.CC) $< > $@ $(COMPILE_DONE)
+	$(QUIETLY) $(PREPROCESS.CXX) $< > $@ $(COMPILE_DONE)
 
 #  Override gnumake built-in rules which do sccs get operations badly.
 #  (They put the checked out code in the current directory, not in the
--- a/make/linux/makefiles/saproc.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/linux/makefiles/saproc.make	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -75,6 +75,7 @@
 	fi
 	@echo Making SA debugger back-end...
 	$(QUIETLY) $(CC) -D$(BUILDARCH) -D_GNU_SOURCE                   \
+		   -D_FILE_OFFSET_BITS=64                               \
                    $(SYMFLAG) $(ARCHFLAG) $(SHARED_FLAG) $(PICFLAG)     \
 	           -I$(SASRCDIR)                                        \
 	           -I$(GENERATED)                                       \
--- a/make/linux/makefiles/sparcWorks.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/linux/makefiles/sparcWorks.make	Mon Feb 27 15:06:36 2012 -0800
@@ -23,13 +23,13 @@
 #
 
 #------------------------------------------------------------------------
-# CC, CPP & AS
+# CC, CXX & AS
 
-CPP = CC
+CXX = CC
 CC  = cc
 AS  = $(CC) -c
 
-HOSTCPP = $(CPP)
+HOSTCXX = $(CXX)
 HOSTCC  = $(CC)
 
 ARCHFLAG = $(ARCHFLAG/$(BUILDARCH))
--- a/make/linux/makefiles/top.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/linux/makefiles/top.make	Mon Feb 27 15:06:36 2012 -0800
@@ -115,8 +115,8 @@
 	@$(UpdatePCH)
 	@$(MAKE) -f vm.make $(MFLAGS-adjusted)
 
-install: the_vm
-	@$(MAKE) -f vm.make install
+install gamma: the_vm
+	@$(MAKE) -f vm.make $@
 
 # next rules support "make foo.[ois]"
 
--- a/make/linux/makefiles/vm.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/linux/makefiles/vm.make	Mon Feb 27 15:06:36 2012 -0800
@@ -88,16 +88,24 @@
 BUILD_USER    = -DHOTSPOT_BUILD_USER="\"$(HOTSPOT_BUILD_USER)\""
 VM_DISTRO     = -DHOTSPOT_VM_DISTRO="\"$(HOTSPOT_VM_DISTRO)\""
 
-CPPFLAGS =           \
+CXXFLAGS =           \
   ${SYSDEFS}         \
   ${INCLUDES}        \
   ${BUILD_VERSION}   \
   ${BUILD_TARGET}    \
   ${BUILD_USER}      \
   ${HS_LIB_ARCH}     \
-  ${JRE_VERSION}     \
   ${VM_DISTRO}
 
+# This is VERY important! The version define must only be supplied to vm_version.o
+# If not, ccache will not re-use the cache at all, since the version string might contain
+# a time and date. 
+vm_version.o: CXXFLAGS += ${JRE_VERSION}
+
+ifndef JAVASE_EMBEDDED
+CFLAGS += -DINCLUDE_TRACE
+endif
+
 # CFLAGS_WARN holds compiler options to suppress/enable warnings.
 CFLAGS += $(CFLAGS_WARN/BYFILE)
 
@@ -143,6 +151,12 @@
 SOURCE_PATHS+=$(HS_COMMON_SRC)/cpu/$(Platform_arch)/vm
 SOURCE_PATHS+=$(HS_COMMON_SRC)/os_cpu/$(Platform_os_arch)/vm
 
+ifndef JAVASE_EMBEDDED
+SOURCE_PATHS+=$(shell if [ -d $(HS_ALT_SRC)/share/vm/jfr ]; then \
+  find $(HS_ALT_SRC)/share/vm/jfr -type d; \
+  fi)
+endif
+
 CORE_PATHS=$(foreach path,$(SOURCE_PATHS),$(call altsrc,$(path)) $(path))
 CORE_PATHS+=$(GENERATED)/jvmtifiles
 
@@ -270,13 +284,13 @@
   LIBS_VM   += $(LLVM_LIBS)
 endif
 
-LINK_VM = $(LINK_LIB.c)
+LINK_VM = $(LINK_LIB.CC)
 
 # rule for building precompiled header
 $(PRECOMPILED_HEADER):
 	$(QUIETLY) echo Generating precompiled header $@
 	$(QUIETLY) mkdir -p $(PRECOMPILED_HEADER_DIR)
-	$(QUIETLY) $(COMPILE.CC) $(DEPFLAGS) -x c++-header $(PRECOMPILED_HEADER_SRC) -o $@ $(COMPILE_DONE)
+	$(QUIETLY) $(COMPILE.CXX) $(DEPFLAGS) -x c++-header $(PRECOMPILED_HEADER_SRC) -o $@ $(COMPILE_DONE)
 
 # making the library:
 
@@ -306,10 +320,10 @@
 $(LIBJVM): $(LIBJVM.o) $(LIBJVM_MAPFILE) $(LD_SCRIPT)
 	$(QUIETLY) {                                                    \
 	    echo Linking vm...;                                         \
-	    $(LINK_LIB.CC/PRE_HOOK)                                     \
+	    $(LINK_LIB.CXX/PRE_HOOK)                                     \
 	    $(LINK_VM) $(LD_SCRIPT_FLAG)                                \
 		       $(LFLAGS_VM) -o $@ $(LIBJVM.o) $(LIBS_VM);       \
-	    $(LINK_LIB.CC/POST_HOOK)                                    \
+	    $(LINK_LIB.CXX/POST_HOOK)                                    \
 	    rm -f $@.1; ln -s $@ $@.1;                                  \
 	    [ -f $(LIBJVM_G) ] || { ln -s $@ $(LIBJVM_G); ln -s $@.1 $(LIBJVM_G).1; }; \
             if [ \"$(CROSS_COMPILE_ARCH)\" = \"\" ] ; then                    \
--- a/make/solaris/makefiles/adlc.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/solaris/makefiles/adlc.make	Mon Feb 27 15:06:36 2012 -0800
@@ -40,9 +40,16 @@
 
 SOURCE.AD = $(OUTDIR)/$(OS)_$(Platform_arch_model).ad 
 
-SOURCES.AD = \
+ifeq ("${Platform_arch_model}", "${Platform_arch}")
+  SOURCES.AD = \
   $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \
   $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad)
+else
+  SOURCES.AD = \
+  $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \
+  $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch).ad) \
+  $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad)
+endif
 
 EXEC	= $(OUTDIR)/adlc
 
@@ -55,10 +62,10 @@
 INCLUDES += $(Src_Dirs_I:%=-I%)
 
 # set flags for adlc compilation
-CPPFLAGS = $(SYSDEFS) $(INCLUDES)
+CXXFLAGS = $(SYSDEFS) $(INCLUDES)
 
 # Force assertions on.
-CPPFLAGS += -DASSERT
+CXXFLAGS += -DASSERT
 
 ifndef USE_GCC
   # We need libCstd.so for adlc 
@@ -123,7 +130,7 @@
 
 $(EXEC) : $(OBJECTS)
 	@echo Making adlc
-	$(QUIETLY) $(LINK_NOPROF.CC) -o $(EXEC) $(OBJECTS)
+	$(QUIETLY) $(LINK_NOPROF.CXX) -o $(EXEC) $(OBJECTS)
 
 # Random dependencies:
 $(OBJECTS): opcodes.hpp classes.hpp adlc.hpp adlcVMDeps.hpp adlparse.hpp archDesc.hpp arena.hpp dict2.hpp filebuff.hpp forms.hpp formsopt.hpp formssel.hpp
@@ -221,14 +228,14 @@
 $(OUTDIR)/%.o: %.cpp
 	@echo Compiling $<
 	$(QUIETLY) $(REMOVE_TARGET)
-	$(QUIETLY) $(COMPILE.CC) -o $@ $< $(COMPILE_DONE)
+	$(QUIETLY) $(COMPILE.CXX) -o $@ $< $(COMPILE_DONE)
 
 # Some object files are given a prefix, to disambiguate
 # them from objects of the same name built for the VM.
 $(OUTDIR)/adlc-%.o: %.cpp
 	@echo Compiling $<
 	$(QUIETLY) $(REMOVE_TARGET)
-	$(QUIETLY) $(COMPILE.CC) -o $@ $< $(COMPILE_DONE)
+	$(QUIETLY) $(COMPILE.CXX) -o $@ $< $(COMPILE_DONE)
 
 # #########################################################################
 
--- a/make/solaris/makefiles/buildtree.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/solaris/makefiles/buildtree.make	Mon Feb 27 15:06:36 2012 -0800
@@ -118,7 +118,7 @@
 BUILDTREE_MAKE	= $(GAMMADIR)/make/$(OS_FAMILY)/makefiles/buildtree.make
 
 BUILDTREE_TARGETS = Makefile flags.make flags_vm.make vm.make adlc.make jvmti.make sa.make \
-        env.ksh env.csh jdkpath.sh .dbxrc test_gamma
+        env.sh env.csh jdkpath.sh .dbxrc test_gamma
 
 BUILDTREE_VARS	= GAMMADIR=$(GAMMADIR) OS_FAMILY=$(OS_FAMILY) \
 	ARCH=$(ARCH) BUILDARCH=$(BUILDARCH) LIBARCH=$(LIBARCH) VARIANT=$(VARIANT)
@@ -313,22 +313,19 @@
 	echo "include \$$(GAMMADIR)/make/$(OS_FAMILY)/makefiles/$(@F)"; \
 	) > $@
 
-env.ksh: $(BUILDTREE_MAKE)
+env.sh: $(BUILDTREE_MAKE)
 	@echo Creating $@ ...
 	$(QUIETLY) ( \
 	$(BUILDTREE_COMMENT); \
 	[ -n "$$JAVA_HOME" ] && { echo ": \$${JAVA_HOME:=$${JAVA_HOME}}"; }; \
 	{ \
-	echo "LD_LIBRARY_PATH=.:$${LD_LIBRARY_PATH:+$$LD_LIBRARY_PATH:}\$${JAVA_HOME}/jre/lib/${LIBARCH}/native_threads:\$${JAVA_HOME}/jre/lib/${LIBARCH}:${GCC_LIB}"; \
-	echo "unset LD_LIBRARY_PATH_32"; \
-	echo "unset LD_LIBRARY_PATH_64"; \
 	echo "CLASSPATH=$${CLASSPATH:+$$CLASSPATH:}.:\$${JAVA_HOME}/jre/lib/rt.jar:\$${JAVA_HOME}/jre/lib/i18n.jar"; \
 	} | sed s:$${JAVA_HOME:--------}:\$${JAVA_HOME}:g; \
 	echo "HOTSPOT_BUILD_USER=\"$${LOGNAME:-$$USER} in `basename $(GAMMADIR)`\""; \
 	echo "export JAVA_HOME LD_LIBRARY_PATH CLASSPATH HOTSPOT_BUILD_USER"; \
 	) > $@
 
-env.csh: env.ksh
+env.csh: env.sh
 	@echo Creating $@ ...
 	$(QUIETLY) ( \
 	$(BUILDTREE_COMMENT); \
@@ -384,23 +381,86 @@
 JAVA_FLAG/64 = -d64
 
 WRONG_DATA_MODE_MSG = \
-	echo "JAVA_HOME must point to $(DATA_MODE)bit JDK."
+	echo "JAVA_HOME must point to a $(DATA_MODE)-bit OpenJDK."
+
+CROSS_COMPILING_MSG = \
+	echo "Cross compiling for ARCH $(CROSS_COMPILE_ARCH), skipping gamma run."
 
 test_gamma:  $(BUILDTREE_MAKE) $(GAMMADIR)/make/test/Queens.java
 	@echo Creating $@ ...
 	$(QUIETLY) ( \
-	echo '#!/bin/ksh'; \
+	echo "#!/bin/sh"; \
+	echo ""; \
 	$(BUILDTREE_COMMENT); \
-	echo '. ./env.ksh'; \
-	echo "if [ -z \$$JAVA_HOME ]; then { $(NO_JAVA_HOME_MSG); exit 0; }; fi"; \
-	echo "if ! \$${JAVA_HOME}/bin/java $(JAVA_FLAG) -fullversion 2>&1 > /dev/null"; \
-	echo "then"; \
-	echo "  $(WRONG_DATA_MODE_MSG); exit 0;"; \
+	echo ""; \
+	echo "# Include environment settings for gamma run"; \
+	echo ""; \
+	echo ". ./env.sh"; \
+	echo ""; \
+	echo "# Do not run gamma test for cross compiles"; \
+	echo ""; \
+	echo "if [ -n \"$(CROSS_COMPILE_ARCH)\" ]; then "; \
+	echo "  $(CROSS_COMPILING_MSG)"; \
+	echo "  exit 0"; \
+	echo "fi"; \
+	echo ""; \
+	echo "# Make sure JAVA_HOME is set as it is required for gamma"; \
+	echo ""; \
+	echo "if [ -z \"\$${JAVA_HOME}\" ]; then "; \
+	echo "  $(NO_JAVA_HOME_MSG)"; \
+	echo "  exit 0"; \
+	echo "fi"; \
+	echo ""; \
+	echo "# Check JAVA_HOME version to be used for the test"; \
+	echo ""; \
+	echo "\$${JAVA_HOME}/bin/java $(JAVA_FLAG) -fullversion > /dev/null 2>&1"; \
+	echo "if [ \$$? -ne 0 ]; then "; \
+	echo "  $(WRONG_DATA_MODE_MSG)"; \
+	echo "  exit 0"; \
 	echo "fi"; \
+	echo ""; \
+	echo "# Use gamma_g if it exists"; \
+	echo ""; \
+	echo "GAMMA_PROG=gamma"; \
+	echo "if [ -f gamma_g ]; then "; \
+	echo "  GAMMA_PROG=gamma_g"; \
+	echo "fi"; \
+	echo ""; \
+	echo "if [ \"$(OS_VENDOR)\" = \"Darwin\" ]; then "; \
+	echo "  # Ensure architecture for gamma and JAVA_HOME is the same."; \
+	echo "  # NOTE: gamma assumes the OpenJDK directory layout."; \
+	echo ""; \
+	echo "  GAMMA_ARCH=\"\`file \$${GAMMA_PROG} | awk '{print \$$NF}'\`\""; \
+	echo "  JVM_LIB=\"\$${JAVA_HOME}/jre/lib/libjava.$(LIBRARY_SUFFIX)\""; \
+	echo "  if [ ! -f \$${JVM_LIB} ]; then"; \
+	echo "    JVM_LIB=\"\$${JAVA_HOME}/jre/lib/$${LIBARCH}/libjava.$(LIBRARY_SUFFIX)\""; \
+	echo "  fi"; \
+	echo "  if [ ! -f \$${JVM_LIB} ] || [ -z \"\`file \$${JVM_LIB} | grep \$${GAMMA_ARCH}\`\" ]; then "; \
+	echo "    $(WRONG_DATA_MODE_MSG)"; \
+	echo "    exit 0"; \
+	echo "  fi"; \
+	echo "fi"; \
+	echo ""; \
+	echo "# Compile Queens program for test"; \
+	echo ""; \
 	echo "rm -f Queens.class"; \
 	echo "\$${JAVA_HOME}/bin/javac -d . $(GAMMADIR)/make/test/Queens.java"; \
-	echo '[ -f gamma_g ] && { gamma=gamma_g; }'; \
-	echo './$${gamma:-gamma} $(TESTFLAGS) Queens < /dev/null'; \
+	echo ""; \
+	echo "# Set library path solely for gamma launcher test run"; \
+	echo ""; \
+	echo "LD_LIBRARY_PATH=.:$${LD_LIBRARY_PATH:+$$LD_LIBRARY_PATH:}\$${JAVA_HOME}/jre/lib/${LIBARCH}/native_threads:\$${JAVA_HOME}/jre/lib/${LIBARCH}:${GCC_LIB}"; \
+	echo "export LD_LIBRARY_PATH"; \
+	echo "unset LD_LIBRARY_PATH_32"; \
+	echo "unset LD_LIBRARY_PATH_64"; \
+	echo ""; \
+	echo "if [ \"$(OS_VENDOR)\" = \"Darwin\" ]; then "; \
+	echo "  DYLD_LIBRARY_PATH=.:$${DYLD_LIBRARY_PATH:+$$DYLD_LIBRARY_PATH:}\$${JAVA_HOME}/jre/lib/native_threads:\$${JAVA_HOME}/jre/lib:$${DYLD_LIBRARY_PATH:+$$DYLD_LIBRARY_PATH:}\$${JAVA_HOME}/jre/lib/${LIBARCH}/native_threads:\$${JAVA_HOME}/jre/lib/${LIBARCH}:${GCC_LIB}"; \
+	echo "  export DYLD_LIBRARY_PATH"; \
+	echo "fi"; \
+	echo ""; \
+	echo "# Use the gamma launcher and JAVA_HOME to run the test"; \
+	echo ""; \
+	echo "./\$${GAMMA_PROG} $(TESTFLAGS) Queens < /dev/null"; \
 	) > $@
 	$(QUIETLY) chmod +x $@
 
--- a/make/solaris/makefiles/dtrace.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/solaris/makefiles/dtrace.make	Mon Feb 27 15:06:36 2012 -0800
@@ -150,11 +150,11 @@
 
 lib$(GENOFFS).so: $(DTRACE_SRCDIR)/$(GENOFFS).cpp $(DTRACE_SRCDIR)/$(GENOFFS).h \
                   $(LIBJVM.o)
-	$(QUIETLY) $(CCC) $(CPPFLAGS) $(GENOFFS_CFLAGS) $(SHARED_FLAG) $(PICFLAG) \
+	$(QUIETLY) $(CXX) $(CXXFLAGS) $(GENOFFS_CFLAGS) $(SHARED_FLAG) $(PICFLAG) \
 		 $(LFLAGS_GENOFFS) -o $@ $(DTRACE_SRCDIR)/$(GENOFFS).cpp -lc
 
 $(GENOFFS): $(DTRACE_SRCDIR)/$(GENOFFS)Main.c lib$(GENOFFS).so
-	$(QUIETLY) $(LINK.CC) -z nodefs -o $@ $(DTRACE_SRCDIR)/$(GENOFFS)Main.c \
+	$(QUIETLY) $(LINK.CXX) -z nodefs -o $@ $(DTRACE_SRCDIR)/$(GENOFFS)Main.c \
 		./lib$(GENOFFS).so
 
 CONDITIONALLY_UPDATE_JVMOFFS_TARGET = \
@@ -178,7 +178,7 @@
 	$(QUIETLY) $(CONDITIONALLY_UPDATE_JVMOFFS_TARGET)
 
 $(JVMOFFS.o): $(JVMOFFS).h $(JVMOFFS).cpp 
-	$(QUIETLY) $(CCC) -c -I. -o $@ $(ARCHFLAG) -D$(TYPE) $(JVMOFFS).cpp
+	$(QUIETLY) $(CXX) -c -I. -o $@ $(ARCHFLAG) -D$(TYPE) $(JVMOFFS).cpp
 
 $(LIBJVM_DB): $(DTRACE_SRCDIR)/$(JVM_DB).c $(JVMOFFS.o) $(XLIBJVM_DB) $(LIBJVM_DB_MAPFILE)
 	@echo Making $@
--- a/make/solaris/makefiles/gcc.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/solaris/makefiles/gcc.make	Mon Feb 27 15:06:36 2012 -0800
@@ -23,9 +23,9 @@
 #
 
 #------------------------------------------------------------------------
-# CC, CPP & AS
+# CC, CXX & AS
 
-CPP = g++
+CXX = g++
 CC  = gcc
 AS  = $(CC) -c
 
@@ -36,12 +36,12 @@
 CC_VER_MAJOR := $(shell $(CC) -dumpversion | sed 's/egcs-//' | cut -d'.' -f1)
 CC_VER_MINOR := $(shell $(CC) -dumpversion | sed 's/egcs-//' | cut -d'.' -f2)
 
-# Check for the versions of C++ and C compilers ($CPP and $CC) used. 
+# Check for the versions of C++ and C compilers ($CXX and $CC) used. 
 
 # Get the last thing on the line that looks like x.x+ (x is a digit).
 COMPILER_REV := \
-$(shell $(CPP) -dumpversion | sed 's/egcs-//' | cut -d'.' -f1)
-C_COMPILER_REV := \
+$(shell $(CXX) -dumpversion | sed 's/egcs-//' | cut -d'.' -f1)
+CC_COMPILER_REV := \
 $(shell $(CC) -dumpversion | sed 's/egcs-//' | cut -d'.' -f2)
 
 
--- a/make/solaris/makefiles/launcher.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/solaris/makefiles/launcher.make	Mon Feb 27 15:06:36 2012 -0800
@@ -52,10 +52,10 @@
   LIBS_LAUNCHER             += -l$(JVM) $(LIBS)
 endif
 
-LINK_LAUNCHER = $(LINK.CC)
+LINK_LAUNCHER = $(LINK.CXX)
 
-LINK_LAUNCHER/PRE_HOOK  = $(LINK_LIB.CC/PRE_HOOK)
-LINK_LAUNCHER/POST_HOOK = $(LINK_LIB.CC/POST_HOOK)
+LINK_LAUNCHER/PRE_HOOK  = $(LINK_LIB.CXX/PRE_HOOK)
+LINK_LAUNCHER/POST_HOOK = $(LINK_LIB.CXX/POST_HOOK)
 
 ifeq ("${Platform_compiler}", "sparcWorks")
 # Enable the following LAUNCHERFLAGS addition if you need to compare the
@@ -86,11 +86,11 @@
 
 $(LAUNCHER_OUT)/%.o: $(LAUNCHERDIR_SHARE)/%.c
 	$(QUIETLY) [ -d $(LAUNCHER_OUT) ] || { mkdir -p $(LAUNCHER_OUT); }
-	$(QUIETLY) $(CC) -g -o $@ -c $< -MMD $(LAUNCHERFLAGS) $(CPPFLAGS)
+	$(QUIETLY) $(CC) -g -o $@ -c $< -MMD $(LAUNCHERFLAGS) $(CXXFLAGS)
 
 $(LAUNCHER_OUT)/%.o: $(LAUNCHERDIR)/%.c
 	$(QUIETLY) [ -d $(LAUNCHER_OUT) ] || { mkdir -p $(LAUNCHER_OUT); }
-	$(QUIETLY) $(CC) -g -o $@ -c $< -MMD $(LAUNCHERFLAGS) $(CPPFLAGS)
+	$(QUIETLY) $(CC) -g -o $@ -c $< -MMD $(LAUNCHERFLAGS) $(CXXFLAGS)
 
 $(LAUNCHER): $(OBJS) $(LIBJVM) $(LAUNCHER_MAPFILE)
 ifeq ($(filter -sbfast -xsbfast, $(CFLAGS_BROWSE)),)
--- a/make/solaris/makefiles/product.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/solaris/makefiles/product.make	Mon Feb 27 15:06:36 2012 -0800
@@ -70,7 +70,7 @@
 
 # If we can create .debuginfo files, then the VM is stripped in vm.make
 # and this macro is not used.
-# LINK_LIB.CC/POST_HOOK += $(STRIP_LIB.CC/POST_HOOK)
+# LINK_LIB.CXX/POST_HOOK += $(STRIP_LIB.CXX/POST_HOOK)
 
 G_SUFFIX =
 SYSDEFS += -DPRODUCT
--- a/make/solaris/makefiles/rules.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/solaris/makefiles/rules.make	Mon Feb 27 15:06:36 2012 -0800
@@ -27,44 +27,31 @@
 # Tell make that .cpp is important
 .SUFFIXES: .cpp $(SUFFIXES)
 
-# For now.  Other makefiles use CPP as the c++ compiler, but that should really
-# name the preprocessor.
-ifeq    ($(CCC),)
-CCC             = $(CPP)
-endif
-
 DEMANGLER       = c++filt
 DEMANGLE        = $(DEMANGLER) < $@ > .$@ && mv -f .$@ $@
 
-# $(CC) is the c compiler (cc/gcc), $(CCC) is the c++ compiler (CC/g++).
-C_COMPILE       = $(CC) $(CPPFLAGS) $(CFLAGS)
-CC_COMPILE      = $(CCC) $(CPPFLAGS) $(CFLAGS)
+# $(CC) is the c compiler (cc/gcc), $(CXX) is the c++ compiler (CC/g++).
+CC_COMPILE       = $(CC) $(CXXFLAGS) $(CFLAGS)
+CXX_COMPILE      = $(CXX) $(CXXFLAGS) $(CFLAGS)
 
 AS.S            = $(AS) $(ASFLAGS)
 
-COMPILE.c       = $(C_COMPILE) -c
-GENASM.c        = $(C_COMPILE) -S
-LINK.c          = $(CC) $(LFLAGS) $(AOUT_FLAGS) $(PROF_AOUT_FLAGS)
-LINK_LIB.c      = $(CC) $(LFLAGS) $(SHARED_FLAG)
-PREPROCESS.c    = $(C_COMPILE) -E
+COMPILE.CC       = $(CC_COMPILE) -c
+GENASM.CC        = $(CC_COMPILE) -S
+LINK.CC          = $(CC) $(LFLAGS) $(AOUT_FLAGS) $(PROF_AOUT_FLAGS)
+LINK_LIB.CC      = $(CC) $(LFLAGS) $(SHARED_FLAG)
+PREPROCESS.CC    = $(CC_COMPILE) -E
 
-COMPILE.CC      = $(CC_COMPILE) -c
-GENASM.CC       = $(CC_COMPILE) -S
-LINK.CC         = $(CCC) $(LFLAGS) $(AOUT_FLAGS) $(PROF_AOUT_FLAGS)
-LINK_NOPROF.CC  = $(CCC) $(LFLAGS) $(AOUT_FLAGS)
-LINK_LIB.CC     = $(CCC) $(LFLAGS) $(SHARED_FLAG)
-PREPROCESS.CC   = $(CC_COMPILE) -E
+COMPILE.CXX      = $(CXX_COMPILE) -c
+GENASM.CXX       = $(CXX_COMPILE) -S
+LINK.CXX         = $(CXX) $(LFLAGS) $(AOUT_FLAGS) $(PROF_AOUT_FLAGS)
+LINK_NOPROF.CXX  = $(CXX) $(LFLAGS) $(AOUT_FLAGS)
+LINK_LIB.CXX     = $(CXX) $(LFLAGS) $(SHARED_FLAG)
+PREPROCESS.CXX   = $(CXX_COMPILE) -E
 
 # Effect of REMOVE_TARGET is to delete out-of-date files during "gnumake -k".
 REMOVE_TARGET   = rm -f $@
 
-# Synonyms.
-COMPILE.cpp     = $(COMPILE.CC)
-GENASM.cpp      = $(GENASM.CC)
-LINK.cpp        = $(LINK.CC)
-LINK_LIB.cpp    = $(LINK_LIB.CC)
-PREPROCESS.cpp  = $(PREPROCESS.CC)
-
 # Note use of ALT_BOOTDIR to explicitly specify location of java and
 # javac; this is the same environment variable used in the J2SE build
 # process for overriding the default spec, which is BOOTDIR.
@@ -153,14 +140,14 @@
 %.o: %.cpp
 	@echo Compiling $<
 	$(QUIETLY) $(REMOVE_TARGET)
-	$(QUIETLY) $(COMPILE.CC) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE)
+	$(QUIETLY) $(COMPILE.CXX) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE)
 else
 %.o: %.cpp
 	@echo Compiling $<
 	$(QUIETLY) $(REMOVE_TARGET)
 	$(QUIETLY) $(if $(findstring $@, $(NONPIC_OBJ_FILES)), \
-	   $(subst $(VM_PICFLAG), ,$(COMPILE.CC)) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE), \
-	   $(COMPILE.CC) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE))
+	   $(subst $(VM_PICFLAG), ,$(COMPILE.CXX)) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE), \
+	   $(COMPILE.CXX) $(DEPFLAGS) -o $@ $< $(COMPILE_DONE))
 endif
 
 %.o: %.s
@@ -170,13 +157,13 @@
 
 %.s: %.cpp
 	@echo Generating assembly for $<
-	$(QUIETLY) $(GENASM.CC) -o $@ $<
+	$(QUIETLY) $(GENASM.CXX) -o $@ $<
 	$(QUIETLY) $(DEMANGLE) $(COMPILE_DONE)
 
 # Intermediate files (for debugging macros)
 %.i: %.cpp
 	@echo Preprocessing $< to $@
-	$(QUIETLY) $(PREPROCESS.CC) $< > $@ $(COMPILE_DONE)
+	$(QUIETLY) $(PREPROCESS.CXX) $< > $@ $(COMPILE_DONE)
 
 #  Override gnumake built-in rules which do sccs get operations badly.
 #  (They put the checked out code in the current directory, not in the
--- a/make/solaris/makefiles/saproc.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/solaris/makefiles/saproc.make	Mon Feb 27 15:06:36 2012 -0800
@@ -93,7 +93,7 @@
 	  exit 1; \
 	fi
 	@echo Making SA debugger back-end...
-	$(QUIETLY) $(CPP)                                               \
+	$(QUIETLY) $(CXX)                                               \
                    $(SYMFLAG) $(ARCHFLAG) $(SHARED_FLAG) $(PICFLAG)     \
 	           -I$(SASRCDIR)                                        \
 	           -I$(GENERATED)                                       \
--- a/make/solaris/makefiles/sparcWorks.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/solaris/makefiles/sparcWorks.make	Mon Feb 27 15:06:36 2012 -0800
@@ -26,7 +26,7 @@
 
 # tell make which C and C++ compilers to use
 CC	= cc
-CPP	= CC
+CXX	= CC
 
 # Note that this 'as' is an older version of the Sun Studio 'fbe', and will
 #   use the older style options. The 'fbe' options will match 'cc' and 'CC'.
@@ -37,23 +37,23 @@
 
 REORDER_FLAG = -xF
 
-# Check for the versions of C++ and C compilers ($CPP and $CC) used. 
+# Check for the versions of C++ and C compilers ($CXX and $CC) used. 
 
 # Get the last thing on the line that looks like x.x+ (x is a digit).
 COMPILER_REV := \
-$(shell $(CPP) -V 2>&1 | sed -n 's/^.*[ ,\t]C++[ ,\t]\([1-9]\.[0-9][0-9]*\).*/\1/p')
-C_COMPILER_REV := \
+$(shell $(CXX) -V 2>&1 | sed -n 's/^.*[ ,\t]C++[ ,\t]\([1-9]\.[0-9][0-9]*\).*/\1/p')
+CC_COMPILER_REV := \
 $(shell $(CC) -V 2>&1 | sed -n 's/^.*[ ,\t]C[ ,\t]\([1-9]\.[0-9][0-9]*\).*/\1/p')
 
 # Pick which compiler is validated
 ifeq ($(JRE_RELEASE_VER),1.6.0)
   # Validated compiler for JDK6 is SS11 (5.8)
   VALIDATED_COMPILER_REVS   := 5.8
-  VALIDATED_C_COMPILER_REVS := 5.8
+  VALIDATED_CC_COMPILER_REVS := 5.8
 else
   # Validated compiler for JDK7 is SS12 update 1 + patches (5.10)
   VALIDATED_COMPILER_REVS   := 5.10
-  VALIDATED_C_COMPILER_REVS := 5.10
+  VALIDATED_CC_COMPILER_REVS := 5.10
 endif
 
 # Warning messages about not using the above validated versions
@@ -67,13 +67,13 @@
 	warning.)
 endif
 
-ENFORCE_C_COMPILER_REV${ENFORCE_C_COMPILER_REV} := $(strip ${VALIDATED_C_COMPILER_REVS})
-ifeq ($(filter ${ENFORCE_C_COMPILER_REV},${C_COMPILER_REV}),)
-PRINTABLE_C_REVS := $(subst $(shell echo ' '), or ,${ENFORCE_C_COMPILER_REV})
+ENFORCE_CC_COMPILER_REV${ENFORCE_CC_COMPILER_REV} := $(strip ${VALIDATED_CC_COMPILER_REVS})
+ifeq ($(filter ${ENFORCE_CC_COMPILER_REV},${CC_COMPILER_REV}),)
+PRINTABLE_C_REVS := $(subst $(shell echo ' '), or ,${ENFORCE_CC_COMPILER_REV})
 dummy_var_to_enforce_c_compiler_rev := $(shell \
-	echo >&2 WARNING: You are using cc version ${C_COMPILER_REV} and \
+	echo >&2 WARNING: You are using cc version ${CC_COMPILER_REV} and \
 	should be using version ${PRINTABLE_C_REVS}.; \
-	echo >&2 Set ENFORCE_C_COMPILER_REV=${C_COMPILER_REV} to avoid this \
+	echo >&2 Set ENFORCE_CC_COMPILER_REV=${CC_COMPILER_REV} to avoid this \
 	warning.)
 endif
 
@@ -98,7 +98,7 @@
                        } \
 	      END      { exit rc; }'
 
-LINK_LIB.CC/PRE_HOOK += $(JVM_CHECK_SYMBOLS) || exit 1;
+LINK_LIB.CXX/PRE_HOOK += $(JVM_CHECK_SYMBOLS) || exit 1;
 
 # New architecture options started in SS12 (5.9), we need both styles to build.
 #   The older arch options for SS11 (5.8) or older and also for /usr/ccs/bin/as.
@@ -518,7 +518,7 @@
 #FASTDEBUG_CFLAGS += -Qoption ccfe -xglobalstatic
 
 ifeq	(${COMPILER_REV_NUMERIC}, 502)
-COMPILER_DATE := $(shell $(CPP) -V 2>&1 | sed -n '/^.*[ ]C++[ ]\([1-9]\.[0-9][0-9]*\)/p' | awk '{ print $$NF; }')
+COMPILER_DATE := $(shell $(CXX) -V 2>&1 | sed -n '/^.*[ ]C++[ ]\([1-9]\.[0-9][0-9]*\)/p' | awk '{ print $$NF; }')
 ifeq	(${COMPILER_DATE}, 2001/01/31)
 # disable -g0 in fastdebug since SC6.1 dated 2001/01/31 seems to be buggy
 # use an innocuous value because it will get -g if it's empty
@@ -568,7 +568,7 @@
 # removing repeated lines.  The data can be extracted from
 # binaries in the field by using "mcs -p libjvm.so" or the older
 # command "what libjvm.so".
-LINK_LIB.CC/POST_HOOK += $(MCS) -c $@ || exit 1;
+LINK_LIB.CXX/POST_HOOK += $(MCS) -c $@ || exit 1;
 # (The exit 1 is necessary to cause a build failure if the command fails and
 # multiple commands are strung together, and the final semicolon is necessary
 # since the hook must terminate itself as a valid command.)
@@ -576,7 +576,7 @@
 # Also, strip debug and line number information (worth about 1.7Mb).
 # If we can create .debuginfo files, then the VM is stripped in vm.make
 # and this macro is not used.
-STRIP_LIB.CC/POST_HOOK = $(STRIP) -x $@ || exit 1;
-# STRIP_LIB.CC/POST_HOOK is incorporated into LINK_LIB.CC/POST_HOOK
+STRIP_LIB.CXX/POST_HOOK = $(STRIP) -x $@ || exit 1;
+# STRIP_LIB.CXX/POST_HOOK is incorporated into LINK_LIB.CXX/POST_HOOK
 # in certain configurations, such as product.make.  Other configurations,
 # such as debug.make, do not include the strip operation.
--- a/make/solaris/makefiles/top.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/solaris/makefiles/top.make	Mon Feb 27 15:06:36 2012 -0800
@@ -107,8 +107,8 @@
 the_vm: vm_build_preliminaries $(adjust-mflags)
 	@$(MAKE) -f vm.make $(MFLAGS-adjusted)
 
-install: the_vm
-	@$(MAKE) -f vm.make install
+install gamma: the_vm
+	@$(MAKE) -f vm.make $@
 
 # next rules support "make foo.[oi]"
 
--- a/make/solaris/makefiles/vm.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/solaris/makefiles/vm.make	Mon Feb 27 15:06:36 2012 -0800
@@ -76,16 +76,20 @@
 BUILD_USER    = -DHOTSPOT_BUILD_USER="\"$(HOTSPOT_BUILD_USER)\""
 VM_DISTRO     = -DHOTSPOT_VM_DISTRO="\"$(HOTSPOT_VM_DISTRO)\""
 
-CPPFLAGS =           \
+CXXFLAGS =           \
   ${SYSDEFS}         \
   ${INCLUDES}        \
   ${BUILD_VERSION}   \
   ${BUILD_TARGET}    \
   ${BUILD_USER}      \
   ${HS_LIB_ARCH}     \
-  ${JRE_VERSION}     \
   ${VM_DISTRO}
 
+# This is VERY important! The version define must only be supplied to vm_version.o
+# If not, ccache will not re-use the cache at all, since the version string might contain
+# a time and date. 
+vm_version.o: CXXFLAGS += ${JRE_VERSION} 
+
 # CFLAGS_WARN holds compiler options to suppress/enable warnings.
 CFLAGS += $(CFLAGS_WARN)
 
@@ -93,7 +97,7 @@
 CFLAGS += $(CFLAGS/NOEX)
 
 # Extra flags from gnumake's invocation or environment
-CFLAGS += $(EXTRA_CFLAGS)
+CFLAGS += $(EXTRA_CFLAGS) -DINCLUDE_TRACE
 
 # Math Library (libm.so), do not use -lm.
 #    There might be two versions of libm.so on the build system:
@@ -160,6 +164,10 @@
 SOURCE_PATHS+=$(HS_COMMON_SRC)/cpu/$(Platform_arch)/vm
 SOURCE_PATHS+=$(HS_COMMON_SRC)/os_cpu/$(Platform_os_arch)/vm
 
+SOURCE_PATHS+=$(shell if [ -d $(HS_ALT_SRC)/share/vm/jfr ]; then \
+  find $(HS_ALT_SRC)/share/vm/jfr -type d; \
+  fi)
+
 CORE_PATHS=$(foreach path,$(SOURCE_PATHS),$(call altsrc,$(path)) $(path))
 CORE_PATHS+=$(GENERATED)/jvmtifiles
 
@@ -261,17 +269,17 @@
 endif
 
 ifdef USE_GCC
-LINK_VM = $(LINK_LIB.c)
+LINK_VM = $(LINK_LIB.CC)
 else
-LINK_VM = $(LINK_LIB.CC)
+LINK_VM = $(LINK_LIB.CXX)
 endif
 # making the library:
 $(LIBJVM): $(LIBJVM.o) $(LIBJVM_MAPFILE) 
 ifeq ($(filter -sbfast -xsbfast, $(CFLAGS_BROWSE)),)
 	@echo Linking vm...
-	$(QUIETLY) $(LINK_LIB.CC/PRE_HOOK)
+	$(QUIETLY) $(LINK_LIB.CXX/PRE_HOOK)
 	$(QUIETLY) $(LINK_VM) $(LFLAGS_VM) -o $@ $(LIBJVM.o) $(LIBS_VM)
-	$(QUIETLY) $(LINK_LIB.CC/POST_HOOK)
+	$(QUIETLY) $(LINK_LIB.CXX/POST_HOOK)
 	$(QUIETLY) rm -f $@.1 && ln -s $@ $@.1
 	$(QUIETLY) [ -f $(LIBJVM_G) ] || ln -s $@ $(LIBJVM_G)
 	$(QUIETLY) [ -f $(LIBJVM_G).1 ] || ln -s $@.1 $(LIBJVM_G).1
--- a/make/windows/build.bat	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/windows/build.bat	Mon Feb 27 15:06:36 2012 -0800
@@ -35,6 +35,8 @@
 if %errorlevel% == 0 goto isia64
 cl 2>&1 | grep "AMD64" >NUL
 if %errorlevel% == 0 goto amd64
+cl 2>&1 | grep "x64" >NUL
+if %errorlevel% == 0 goto amd64
 set ARCH=x86
 set BUILDARCH=i486
 set Platform_arch=x86
--- a/make/windows/build_vm_def.sh	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/windows/build_vm_def.sh	Mon Feb 27 15:06:36 2012 -0800
@@ -58,10 +58,10 @@
 
 # When called from IDE the first param should contain the link version, otherwise may be nill
 if [ "x$1" != "x" ]; then
-LINK_VER="$1"
+LD_VER="$1"
 fi
 
-if [ "x$LINK_VER" != "x800" -a  "x$LINK_VER" != "x900" -a "x$LINK_VER" != "x1000" ]; then
+if [ "x$LD_VER" != "x800" -a  "x$LD_VER" != "x900" -a "x$LD_VER" != "x1000" ]; then
 $DUMPBIN /symbols *.obj | "$GREP" "??_7.*@@6B@" | "$GREP" -v "type_info" | "$AWK" '{print $7}' | "$SORT" | "$UNIQ" > vm2.def
 else
 # Can't use pipes when calling cl.exe or link.exe from IDE. Using transit file vm3.def
--- a/make/windows/create_obj_files.sh	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/windows/create_obj_files.sh	Mon Feb 27 15:06:36 2012 -0800
@@ -73,6 +73,13 @@
 
 BASE_PATHS="${BASE_PATHS} ${GENERATED}/jvmtifiles"
 
+if [ -d "${ALTSRC}/share/vm/jfr" ]; then
+  BASE_PATHS="${BASE_PATHS} ${ALTSRC}/share/vm/jfr/agent"
+  BASE_PATHS="${BASE_PATHS} ${ALTSRC}/share/vm/jfr/agent/isolated_deps/util"
+  BASE_PATHS="${BASE_PATHS} ${ALTSRC}/share/vm/jfr/jvm"
+  BASE_PATHS="${BASE_PATHS} ${ALTSRC}/share/vm/jfr"
+fi
+
 CORE_PATHS="${BASE_PATHS}"
 # shared is already in BASE_PATHS. Should add vm/memory but that one is also in BASE_PATHS.
 if [ -d "${ALTSRC}/share/vm/gc_implementation" ]; then
--- a/make/windows/get_msc_ver.sh	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/windows/get_msc_ver.sh	Mon Feb 27 15:06:36 2012 -0800
@@ -72,8 +72,8 @@
   echo "MSC_VER_RAW=$MSC_VER_RAW"
 fi
 
-if [ "x$FORCE_LINK_VER" != "x" ]; then
-  echo "LINK_VER=$FORCE_LINK_VER"
+if [ "x$FORCE_LD_VER" != "x" ]; then
+  echo "LD_VER=$FORCE_LD_VER"
 else
   # use the "link" command that is co-located with the "cl" command
   cl_cmd=`which cl`
@@ -83,11 +83,11 @@
     # which can't find "cl" so just use which ever "link" we find
     link_cmd="link"
   fi
-  LINK_VER_RAW=`"$link_cmd" 2>&1 | "$HEAD" -n 1 | "$SED" 's/.*Version[\ ]*\([0-9][0-9.]*\).*/\1/'`
-  LINK_VER_MAJOR=`"$ECHO" $LINK_VER_RAW | "$CUT" -d'.' -f1`
-  LINK_VER_MINOR=`"$ECHO" $LINK_VER_RAW | "$CUT" -d'.' -f2`
-  LINK_VER_MICRO=`"$ECHO" $LINK_VER_RAW | "$CUT" -d'.' -f3`
-  LINK_VER=`"$EXPR" $LINK_VER_MAJOR \* 100 + $LINK_VER_MINOR`
-  echo "LINK_VER=$LINK_VER"
-  echo "LINK_VER_RAW=$LINK_VER_RAW"
+  LD_VER_RAW=`"$link_cmd" 2>&1 | "$HEAD" -n 1 | "$SED" 's/.*Version[\ ]*\([0-9][0-9.]*\).*/\1/'`
+  LD_VER_MAJOR=`"$ECHO" $LD_VER_RAW | "$CUT" -d'.' -f1`
+  LD_VER_MINOR=`"$ECHO" $LD_VER_RAW | "$CUT" -d'.' -f2`
+  LD_VER_MICRO=`"$ECHO" $LD_VER_RAW | "$CUT" -d'.' -f3`
+  LD_VER=`"$EXPR" $LD_VER_MAJOR \* 100 + $LD_VER_MINOR`
+  echo "LD_VER=$LD_VER"
+  echo "LD_VER_RAW=$LD_VER_RAW"
 fi
--- a/make/windows/makefiles/adlc.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/windows/makefiles/adlc.make	Mon Feb 27 15:06:36 2012 -0800
@@ -45,14 +45,25 @@
 ADLCFLAGS=-q -T -U_LP64
 !endif
 
-ADLC_CPP_FLAGS=$(CPP_FLAGS) /D _CRT_SECURE_NO_WARNINGS /D _CRT_SECURE_NO_DEPRECATE
+ADLC_CXX_FLAGS=$(CXX_FLAGS) /D _CRT_SECURE_NO_WARNINGS /D _CRT_SECURE_NO_DEPRECATE
 
-CPP_INCLUDE_DIRS=\
+CXX_INCLUDE_DIRS=\
   /I "..\generated" \
   /I "$(WorkSpace)\src\share\vm" \
   /I "$(WorkSpace)\src\os\windows\vm" \
   /I "$(WorkSpace)\src\cpu\$(Platform_arch)\vm"
 
+!if "$(Platform_arch_model)" == "$(Platform_arch)"
+SOURCES_AD=\
+  $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch_model).ad \
+  $(WorkSpace)/src/os_cpu/windows_$(Platform_arch)/vm/windows_$(Platform_arch_model).ad
+!else
+SOURCES_AD=\
+  $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch_model).ad \
+  $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch).ad \
+  $(WorkSpace)/src/os_cpu/windows_$(Platform_arch)/vm/windows_$(Platform_arch_model).ad
+!endif
+
 # NOTE! If you add any files here, you must also update GENERATED_NAMES_IN_DIR
 # and ProjectCreatorIDEOptions in projectcreator.make. 
 GENERATED_NAMES=\
@@ -83,14 +94,14 @@
   $(AdlcOutDir)\dfa_$(Platform_arch_model).cpp
 
 {$(WorkSpace)\src\share\vm\adlc}.cpp.obj::
-        $(CPP) $(ADLC_CPP_FLAGS) $(EXH_FLAGS) $(CPP_INCLUDE_DIRS) /c $<
+        $(CXX) $(ADLC_CXX_FLAGS) $(EXH_FLAGS) $(CXX_INCLUDE_DIRS) /c $<
 
 {$(WorkSpace)\src\share\vm\opto}.cpp.obj::
-        $(CPP) $(ADLC_CPP_FLAGS) $(EXH_FLAGS) $(CPP_INCLUDE_DIRS) /c $<
+        $(CXX) $(ADLC_CXX_FLAGS) $(EXH_FLAGS) $(CXX_INCLUDE_DIRS) /c $<
 
 adlc.exe: main.obj adlparse.obj archDesc.obj arena.obj dfa.obj dict2.obj filebuff.obj \
           forms.obj formsopt.obj formssel.obj opcodes.obj output_c.obj output_h.obj
-	$(LINK) $(LINK_FLAGS) /subsystem:console /out:$@ $**
+	$(LD) $(LD_FLAGS) /subsystem:console /out:$@ $**
 !if "$(MT)" != ""
 # The previous link command created a .manifest file that we want to
 # insert into the linked artifact so we do not need to track it
@@ -105,7 +116,6 @@
 	$(ADLC) $(ADLCFLAGS) $(Platform_arch_model).ad
 	mv $(GENERATED_NAMES) $(AdlcOutDir)/
 
-$(Platform_arch_model).ad: $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch_model).ad $(WorkSpace)/src/os_cpu/windows_$(Platform_arch)/vm/windows_$(Platform_arch_model).ad
+$(Platform_arch_model).ad: $(SOURCES_AD)
 	rm -f $(Platform_arch_model).ad
-	cat $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch_model).ad  \
-	    $(WorkSpace)/src/os_cpu/windows_$(Platform_arch)/vm/windows_$(Platform_arch_model).ad >$(Platform_arch_model).ad
+	cat $(SOURCES_AD) >$(Platform_arch_model).ad
--- a/make/windows/makefiles/compile.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/windows/makefiles/compile.make	Mon Feb 27 15:06:36 2012 -0800
@@ -23,9 +23,9 @@
 #
 
 # Generic compiler settings
-CPP=cl.exe
+CXX=cl.exe
 
-# CPP Flags: (these vary slightly from VC6->VS2003->VS2005 compilers)
+# CXX Flags: (these vary slightly from VC6->VS2003->VS2005 compilers)
 #   /nologo   Supress copyright message at every cl.exe startup
 #   /W3       Warning level 3
 #   /Zi       Include debugging information
@@ -50,47 +50,47 @@
 # improving the quality of crash log stack traces involving jvm.dll.
 
 # These are always used in all compiles
-CPP_FLAGS=/nologo /W3 /WX
+CXX_FLAGS=/nologo /W3 /WX
 
 # Let's add debug information always too.
-CPP_FLAGS=$(CPP_FLAGS) /Zi
+CXX_FLAGS=$(CXX_FLAGS) /Zi
 
 # Based on BUILDARCH we add some flags and select the default compiler name
 !if "$(BUILDARCH)" == "ia64"
 MACHINE=IA64
 DEFAULT_COMPILER_NAME=VS2003
-CPP_FLAGS=$(CPP_FLAGS) /D "CC_INTERP" /D "_LP64" /D "IA64"
+CXX_FLAGS=$(CXX_FLAGS) /D "CC_INTERP" /D "_LP64" /D "IA64"
 !endif
 
 !if "$(BUILDARCH)" == "amd64"
 MACHINE=AMD64
 DEFAULT_COMPILER_NAME=VS2005
-CPP_FLAGS=$(CPP_FLAGS) /D "_LP64" /D "AMD64"
+CXX_FLAGS=$(CXX_FLAGS) /D "_LP64" /D "AMD64"
 LP64=1
 !endif
 
 !if "$(BUILDARCH)" == "i486"
 MACHINE=I386
 DEFAULT_COMPILER_NAME=VS2003
-CPP_FLAGS=$(CPP_FLAGS) /D "IA32"
+CXX_FLAGS=$(CXX_FLAGS) /D "IA32"
 !endif
 
 # Sanity check, this is the default if not amd64, ia64, or i486
 !ifndef DEFAULT_COMPILER_NAME
-CPP=ARCH_ERROR
+CXX=ARCH_ERROR
 !endif
 
-CPP_FLAGS=$(CPP_FLAGS) /D "WIN32" /D "_WINDOWS"
+CXX_FLAGS=$(CXX_FLAGS) /D "WIN32" /D "_WINDOWS"
 # Must specify this for sharedRuntimeTrig.cpp
-CPP_FLAGS=$(CPP_FLAGS) /D "VM_LITTLE_ENDIAN"
+CXX_FLAGS=$(CXX_FLAGS) /D "VM_LITTLE_ENDIAN"
 
 # Used for platform dispatching
-CPP_FLAGS=$(CPP_FLAGS) /D TARGET_OS_FAMILY_windows
-CPP_FLAGS=$(CPP_FLAGS) /D TARGET_ARCH_$(Platform_arch)
-CPP_FLAGS=$(CPP_FLAGS) /D TARGET_ARCH_MODEL_$(Platform_arch_model)
-CPP_FLAGS=$(CPP_FLAGS) /D TARGET_OS_ARCH_windows_$(Platform_arch)
-CPP_FLAGS=$(CPP_FLAGS) /D TARGET_OS_ARCH_MODEL_windows_$(Platform_arch_model)
-CPP_FLAGS=$(CPP_FLAGS) /D TARGET_COMPILER_visCPP
+CXX_FLAGS=$(CXX_FLAGS) /D TARGET_OS_FAMILY_windows
+CXX_FLAGS=$(CXX_FLAGS) /D TARGET_ARCH_$(Platform_arch)
+CXX_FLAGS=$(CXX_FLAGS) /D TARGET_ARCH_MODEL_$(Platform_arch_model)
+CXX_FLAGS=$(CXX_FLAGS) /D TARGET_OS_ARCH_windows_$(Platform_arch)
+CXX_FLAGS=$(CXX_FLAGS) /D TARGET_OS_ARCH_MODEL_windows_$(Platform_arch_model)
+CXX_FLAGS=$(CXX_FLAGS) /D TARGET_COMPILER_visCPP
 
 
 # MSC_VER is a 4 digit number that tells us what compiler is being used
@@ -150,14 +150,14 @@
 # Always add the _STATIC_CPPLIB flag
 STATIC_CPPLIB_OPTION = /D _STATIC_CPPLIB /D _DISABLE_DEPRECATE_STATIC_CPPLIB
 MS_RUNTIME_OPTION = $(MS_RUNTIME_OPTION) $(STATIC_CPPLIB_OPTION)
-CPP_FLAGS=$(CPP_FLAGS) $(MS_RUNTIME_OPTION)
+CXX_FLAGS=$(CXX_FLAGS) $(MS_RUNTIME_OPTION)
 
 # How /GX option is spelled
 GX_OPTION = /GX
 
 # Optimization settings for various versions of the compilers and types of
 #    builds. Three basic sets of settings: product, fastdebug, and debug.
-#    These get added into CPP_FLAGS as needed by other makefiles.
+#    These get added into CXX_FLAGS as needed by other makefiles.
 !if "$(COMPILER_NAME)" == "VC6"
 PRODUCT_OPT_OPTION   = /Ox /Os /Gy /GF
 FASTDEBUG_OPT_OPTION = /Ox /Os /Gy /GF
@@ -180,7 +180,7 @@
 #    externals at link time. Even with /GS-, you need bufferoverflowU.lib.
 #    NOTE: Currently we decided to not use /GS-
 BUFFEROVERFLOWLIB = bufferoverflowU.lib
-LINK_FLAGS = /manifest $(LINK_FLAGS) $(BUFFEROVERFLOWLIB)
+LD_FLAGS = /manifest $(LD_FLAGS) $(BUFFEROVERFLOWLIB)
 # Manifest Tool - used in VS2005 and later to adjust manifests stored
 # as resources inside build artifacts.
 MT=mt.exe
@@ -191,7 +191,7 @@
 FASTDEBUG_OPT_OPTION = /O2 /Oy-
 DEBUG_OPT_OPTION     = /Od
 GX_OPTION = /EHsc
-LINK_FLAGS = /manifest $(LINK_FLAGS)
+LD_FLAGS = /manifest $(LD_FLAGS)
 # Manifest Tool - used in VS2005 and later to adjust manifests stored
 # as resources inside build artifacts.
 MT=mt.exe
@@ -202,12 +202,12 @@
 FASTDEBUG_OPT_OPTION = /O2 /Oy-
 DEBUG_OPT_OPTION     = /Od
 GX_OPTION = /EHsc
-LINK_FLAGS = /manifest $(LINK_FLAGS)
+LD_FLAGS = /manifest $(LD_FLAGS)
 # Manifest Tool - used in VS2005 and later to adjust manifests stored
 # as resources inside build artifacts.
 MT=mt.exe
 !if "$(BUILDARCH)" == "i486"
-LINK_FLAGS = /SAFESEH $(LINK_FLAGS)
+LD_FLAGS = /SAFESEH $(LD_FLAGS)
 !endif
 !endif
 
@@ -225,15 +225,15 @@
 !endif
 
 # Generic linker settings
-LINK=link.exe
-LINK_FLAGS= $(LINK_FLAGS) kernel32.lib user32.lib gdi32.lib winspool.lib \
+LD=link.exe
+LD_FLAGS= $(LD_FLAGS) kernel32.lib user32.lib gdi32.lib winspool.lib \
  comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib \
  uuid.lib Wsock32.lib winmm.lib /nologo /machine:$(MACHINE) /opt:REF \
  /opt:ICF,8 /map /debug
 
 
 !if $(MSC_VER) >= 1600 
-LINK_FLAGS= $(LINK_FLAGS) psapi.lib
+LD_FLAGS= $(LD_FLAGS) psapi.lib
 !endif
 
 # Resource compiler settings
@@ -250,7 +250,7 @@
 	 /D "HS_INTERNAL_NAME=$(HS_INTERNAL_NAME)" \
 	 /D "HS_NAME=$(HS_NAME)"
 
-# Need this to match the CPP_FLAGS settings
+# Need this to match the CXX_FLAGS settings
 !if "$(MFC_DEBUG)" == "true"
 RC_FLAGS = $(RC_FLAGS) /D "_DEBUG"
 !endif
--- a/make/windows/makefiles/debug.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/windows/makefiles/debug.make	Mon Feb 27 15:06:36 2012 -0800
@@ -38,7 +38,7 @@
 !include ../local.make
 !include compile.make
 
-CPP_FLAGS=$(CPP_FLAGS) $(DEBUG_OPT_OPTION)
+CXX_FLAGS=$(CXX_FLAGS) $(DEBUG_OPT_OPTION)
 
 !include $(WorkSpace)/make/windows/makefiles/vm.make
 !include local.make
@@ -52,8 +52,8 @@
 	sh $(WorkSpace)/make/windows/build_vm_def.sh
 
 $(AOUT): $(Res_Files) $(Obj_Files) vm.def
-	$(LINK) @<<
-  $(LINK_FLAGS) /out:$@ /implib:$*.lib /def:vm.def $(Obj_Files) $(Res_Files)
+	$(LD) @<<
+  $(LD_FLAGS) /out:$@ /implib:$*.lib /def:vm.def $(Obj_Files) $(Res_Files)
 <<
 !if "$(MT)" != ""
 # The previous link command created a .manifest file that we want to
--- a/make/windows/makefiles/fastdebug.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/windows/makefiles/fastdebug.make	Mon Feb 27 15:06:36 2012 -0800
@@ -38,7 +38,7 @@
 !include ../local.make
 !include compile.make
 
-CPP_FLAGS=$(CPP_FLAGS) $(FASTDEBUG_OPT_OPTION)
+CXX_FLAGS=$(CXX_FLAGS) $(FASTDEBUG_OPT_OPTION)
 
 !include $(WorkSpace)/make/windows/makefiles/vm.make
 !include local.make
@@ -52,8 +52,8 @@
 	sh $(WorkSpace)/make/windows/build_vm_def.sh
 
 $(AOUT): $(Res_Files) $(Obj_Files) vm.def
-	$(LINK) @<<
-  $(LINK_FLAGS) /out:$@ /implib:$*.lib /def:vm.def $(Obj_Files) $(Res_Files)
+	$(LD) @<<
+  $(LD_FLAGS) /out:$@ /implib:$*.lib /def:vm.def $(Obj_Files) $(Res_Files)
 <<
 !if "$(MT)" != ""
 # The previous link command created a .manifest file that we want to
--- a/make/windows/makefiles/launcher.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/windows/makefiles/launcher.make	Mon Feb 27 15:06:36 2012 -0800
@@ -23,7 +23,7 @@
 #
 
 
-LAUNCHER_FLAGS=$(CPP_FLAGS) $(ARCHFLAG) \
+LAUNCHER_FLAGS=$(CXX_FLAGS) $(ARCHFLAG) \
 	/D FULL_VERSION=\"$(HOTSPOT_RELEASE_VERSION)\" \
 	/D JDK_MAJOR_VERSION=\"$(JDK_MAJOR_VERSION)\" \
 	/D JDK_MINOR_VERSION=\"$(JDK_MINOR_VERSION)\" \
@@ -39,18 +39,18 @@
 	/I $(WorkSpace)\src\cpu\$(Platform_arch)\vm \
 	/I $(WorkSpace)\src\os\windows\vm
 
-LINK_FLAGS=/manifest $(HS_INTERNAL_NAME).lib kernel32.lib user32.lib /nologo /machine:$(MACHINE) /map /debug /subsystem:console 
+LD_FLAGS=/manifest $(HS_INTERNAL_NAME).lib kernel32.lib user32.lib /nologo /machine:$(MACHINE) /map /debug /subsystem:console 
 
 !if "$(COMPILER_NAME)" == "VS2005"
 # This VS2005 compiler has /GS as a default and requires bufferoverflowU.lib
 #    on the link command line, otherwise we get missing __security_check_cookie
 #    externals at link time. Even with /GS-, you need bufferoverflowU.lib.
 BUFFEROVERFLOWLIB = bufferoverflowU.lib
-LINK_FLAGS = $(LINK_FLAGS) $(BUFFEROVERFLOWLIB)
+LD_FLAGS = $(LD_FLAGS) $(BUFFEROVERFLOWLIB)
 !endif
 
 !if "$(COMPILER_NAME)" == "VS2010" && "$(BUILDARCH)" == "i486"
-LINK_FLAGS = /SAFESEH $(LINK_FLAGS)
+LD_FLAGS = /SAFESEH $(LD_FLAGS)
 !endif
 
 LAUNCHERDIR = $(WorkSpace)/src/os/windows/launcher
@@ -60,14 +60,14 @@
 
 {$(LAUNCHERDIR)}.c{$(OUTDIR)}.obj:
 	-mkdir $(OUTDIR) 2>NUL >NUL
-        $(CPP) $(LAUNCHER_FLAGS) /c /Fo$@ $<
+        $(CXX) $(LAUNCHER_FLAGS) /c /Fo$@ $<
 
 {$(LAUNCHERDIR_SHARE)}.c{$(OUTDIR)}.obj:
 	-mkdir $(OUTDIR) 2>NUL >NUL
-        $(CPP) $(LAUNCHER_FLAGS) /c /Fo$@ $<
+        $(CXX) $(LAUNCHER_FLAGS) /c /Fo$@ $<
 
 $(OUTDIR)\*.obj: $(LAUNCHERDIR)\*.c $(LAUNCHERDIR)\*.h $(LAUNCHERDIR_SHARE)\*.c $(LAUNCHERDIR_SHARE)\*.h
 
 launcher: $(OUTDIR)\java.obj $(OUTDIR)\java_md.obj $(OUTDIR)\jli_util.obj
 	echo $(JAVA_HOME) > jdkpath.txt  
-	$(LINK) $(LINK_FLAGS) /out:hotspot.exe $**
+	$(LD) $(LD_FLAGS) /out:hotspot.exe $**
--- a/make/windows/makefiles/product.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/windows/makefiles/product.make	Mon Feb 27 15:06:36 2012 -0800
@@ -37,7 +37,7 @@
 !include ../local.make
 !include compile.make
 
-CPP_FLAGS=$(CPP_FLAGS) $(PRODUCT_OPT_OPTION)
+CXX_FLAGS=$(CXX_FLAGS) $(PRODUCT_OPT_OPTION)
 
 RELEASE=
 
@@ -54,16 +54,16 @@
 # Kernel doesn't need exported vtbl symbols.
 !if "$(Variant)" == "kernel"
 $(AOUT): $(Res_Files) $(Obj_Files)
-	$(LINK) @<<
-  $(LINK_FLAGS) /out:$@ /implib:$*.lib $(Obj_Files) $(Res_Files)
+	$(LD) @<<
+  $(LD_FLAGS) /out:$@ /implib:$*.lib $(Obj_Files) $(Res_Files)
 <<
 !else
 vm.def: $(Obj_Files)
 	sh $(WorkSpace)/make/windows/build_vm_def.sh
 
 $(AOUT): $(Res_Files) $(Obj_Files) vm.def
-	$(LINK) @<<
-  $(LINK_FLAGS) /out:$@ /implib:$*.lib /def:vm.def $(Obj_Files) $(Res_Files)
+	$(LD) @<<
+  $(LD_FLAGS) /out:$@ /implib:$*.lib /def:vm.def $(Obj_Files) $(Res_Files)
 <<
 !endif
 !if "$(MT)" != ""
--- a/make/windows/makefiles/projectcreator.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/windows/makefiles/projectcreator.make	Mon Feb 27 15:06:36 2012 -0800
@@ -58,7 +58,8 @@
         -absoluteInclude $(HOTSPOTBUILDSPACE)/%f/generated \
         -ignorePath $(HOTSPOTBUILDSPACE)/%f/generated \
         -ignorePath src\share\vm\adlc \
-        -ignorePath src\share\vm\shark
+        -ignorePath src\share\vm\shark \
+        -ignorePath posix
 
 # This is referenced externally by both the IDE and batch builds
 ProjectCreatorOptions=
@@ -88,7 +89,7 @@
         -jdkTargetRoot $(HOTSPOTJDKDIST) \
         -define ALIGN_STACK_FRAMES \
         -define VM_LITTLE_ENDIAN \
-        -prelink  "" "Generating vm.def..." "cd %o	set HOTSPOTMKSHOME=$(HOTSPOTMKSHOME)	$(HOTSPOTMKSHOME)\sh $(HOTSPOTWORKSPACE)\make\windows\build_vm_def.sh $(LINK_VER)" \
+        -prelink  "" "Generating vm.def..." "cd %o	set HOTSPOTMKSHOME=$(HOTSPOTMKSHOME)	set JAVA_HOME=$(HOTSPOTJDKDIST)	$(HOTSPOTMKSHOME)\sh $(HOTSPOTWORKSPACE)\make\windows\build_vm_def.sh $(LD_VER)" \
         -postbuild "" "Building hotspot.exe..." "cd %o	set HOTSPOTMKSHOME=$(HOTSPOTMKSHOME)	nmake -f $(HOTSPOTWORKSPACE)\make\windows\projectfiles\common\Makefile LOCAL_MAKE=$(HOTSPOTBUILDSPACE)\%f\local.make JAVA_HOME=$(HOTSPOTJDKDIST) launcher" \
         -ignoreFile jsig.c \
         -ignoreFile jvmtiEnvRecommended.cpp \
@@ -207,6 +208,7 @@
 ##################################################
 ProjectCreatorIDEOptions=$(ProjectCreatorIDEOptions) \
  -define_compiler1 COMPILER1 \
+ -ignorePath_compiler1 src/share/vm/graal \
 $(ProjectCreatorIDEOptionsIgnoreCompiler2:TARGET=compiler1)
 
 ##################################################
@@ -223,6 +225,7 @@
 #NOTE! This list must be kept in sync with GENERATED_NAMES in adlc.make.
 ProjectCreatorIDEOptions=$(ProjectCreatorIDEOptions) \
  -define_compiler2 COMPILER2 \
+ -ignorePath_compiler2 src/share/vm/graal \
  -additionalFile_compiler2 $(Platform_arch_model).ad \
  -additionalGeneratedFile_compiler2 $(HOTSPOTBUILDSPACE)/%f/generated/adfiles ad_$(Platform_arch_model).cpp \
  -additionalGeneratedFile_compiler2 $(HOTSPOTBUILDSPACE)/%f/generated/adfiles ad_$(Platform_arch_model).hpp \
--- a/make/windows/makefiles/sa.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/windows/makefiles/sa.make	Mon Feb 27 15:06:36 2012 -0800
@@ -91,16 +91,16 @@
 !if "$(COMPILER_NAME)" == "VS2005"
 # On amd64, VS2005 compiler requires bufferoverflowU.lib on the link command line, 
 # otherwise we get missing __security_check_cookie externals at link time. 
-SA_LINK_FLAGS = bufferoverflowU.lib
+SA_LD_FLAGS = bufferoverflowU.lib
 !endif
 !else
 SA_CFLAGS = /nologo $(MS_RUNTIME_OPTION) /W3 /Gm $(GX_OPTION) /ZI /Od /D "WIN32" /D "_WINDOWS" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /GZ /c
 !endif
 !if "$(MT)" != ""
-SA_LINK_FLAGS = /manifest $(SA_LINK_FLAGS)
+SA_LD_FLAGS = /manifest $(SA_LD_FLAGS)
 !endif
 SASRCFILE = $(AGENT_DIR)/src/os/win32/windbg/sawindbg.cpp
-SA_LFLAGS = $(SA_LINK_FLAGS) /nologo /subsystem:console /map /debug /machine:$(MACHINE)
+SA_LFLAGS = $(SA_LD_FLAGS) /nologo /subsystem:console /map /debug /machine:$(MACHINE)
 
 # Note that we do not keep sawindbj.obj around as it would then
 # get included in the dumpbin command in build_vm_def.sh
@@ -110,14 +110,14 @@
 # Use ";#2" for .dll and ";#1" for .exe in the MT command below:
 $(SAWINDBG): $(SASRCFILE)
 	set INCLUDE=$(SA_INCLUDE)$(INCLUDE)
-	$(CPP) @<<
+	$(CXX) @<<
 	  /I"$(BootStrapDir)/include" /I"$(BootStrapDir)/include/win32" 
 	  /I"$(GENERATED)" $(SA_CFLAGS)
 	  $(SASRCFILE)
 	  /out:sawindbg.obj
 <<
 	set LIB=$(SA_LIB)$(LIB)
-	$(LINK) /out:$@ /DLL sawindbg.obj dbgeng.lib $(SA_LFLAGS)
+	$(LD) /out:$@ /DLL sawindbg.obj dbgeng.lib $(SA_LFLAGS)
 !if "$(MT)" != ""
 	$(MT) /manifest $(@F).manifest /outputresource:$(@F);#2
 !endif
--- a/make/windows/makefiles/sanity.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/windows/makefiles/sanity.make	Mon Feb 27 15:06:36 2012 -0800
@@ -31,5 +31,5 @@
 	echo *** WARNING *** unrecognized cl.exe version $(MSC_VER) ($(RAW_MSC_VER)).  Use FORCE_MSC_VER to override automatic detection.
 
 checkLink:
-	@ if "$(LINK_VER)" NEQ "710" if "$(LINK_VER)" NEQ "800" if "$(LINK_VER)" NEQ "900" if "$(LINK_VER)" NEQ "1000" \
-	echo *** WARNING *** unrecognized link.exe version $(LINK_VER) ($(RAW_LINK_VER)).  Use FORCE_LINK_VER to override automatic detection.
+	@ if "$(LD_VER)" NEQ "710" if "$(LD_VER)" NEQ "800" if "$(LD_VER)" NEQ "900" if "$(LD_VER)" NEQ "1000" \
+	echo *** WARNING *** unrecognized link.exe version $(LD_VER) ($(RAW_LD_VER)).  Use FORCE_LD_VER to override automatic detection.
--- a/make/windows/makefiles/shared.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/windows/makefiles/shared.make	Mon Feb 27 15:06:36 2012 -0800
@@ -30,8 +30,8 @@
 DIR=.
 !endif
 
-!ifndef CPP
-CPP=cl.exe
+!ifndef CXX
+CXX=cl.exe
 !endif
 
 
--- a/make/windows/makefiles/vm.make	Mon Feb 27 14:50:58 2012 -0800
+++ b/make/windows/makefiles/vm.make	Mon Feb 27 15:06:36 2012 -0800
@@ -19,7 +19,7 @@
 # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 # or visit www.oracle.com if you need additional information or have any
 # questions.
-#  
+#
 #
 
 # Resource file containing VERSIONINFO
@@ -30,14 +30,14 @@
 COMMONSRC=$(WorkSpace)\src
 ALTSRC=$(WorkSpace)\src\closed
 
-!ifdef RELEASE 
+!ifdef RELEASE
 !ifdef DEVELOP
-CPP_FLAGS=$(CPP_FLAGS) /D "DEBUG"
+CXX_FLAGS=$(CXX_FLAGS) /D "DEBUG"
 !else
-CPP_FLAGS=$(CPP_FLAGS) /D "PRODUCT"
+CXX_FLAGS=$(CXX_FLAGS) /D "PRODUCT"
 !endif
 !else
-CPP_FLAGS=$(CPP_FLAGS) /D "ASSERT"
+CXX_FLAGS=$(CXX_FLAGS) /D "ASSERT"
 !endif
 
 !if "$(Variant)" == "core"
@@ -45,19 +45,19 @@
 !endif
 
 !if "$(Variant)" == "kernel"
-CPP_FLAGS=$(CPP_FLAGS) /D "KERNEL"
+CXX_FLAGS=$(CXX_FLAGS) /D "KERNEL"
 !endif
 
 !if "$(Variant)" == "compiler1"
-CPP_FLAGS=$(CPP_FLAGS) /D "COMPILER1"
+CXX_FLAGS=$(CXX_FLAGS) /D "COMPILER1"
 !endif
 
 !if "$(Variant)" == "compiler2"
-CPP_FLAGS=$(CPP_FLAGS) /D "COMPILER2"
+CXX_FLAGS=$(CXX_FLAGS) /D "COMPILER2"
 !endif
 
 !if "$(Variant)" == "tiered"
-CPP_FLAGS=$(CPP_FLAGS) /D "COMPILER1" /D "COMPILER2"
+CXX_FLAGS=$(CXX_FLAGS) /D "COMPILER1" /D "COMPILER2"
 !endif
 
 !if "$(Variant)" == "graal"
@@ -71,17 +71,21 @@
 !endif
 
 # The following variables are defined in the generated local.make file.
-CPP_FLAGS=$(CPP_FLAGS) /D "HOTSPOT_RELEASE_VERSION=\"$(HS_BUILD_VER)\""
-CPP_FLAGS=$(CPP_FLAGS) /D "JRE_RELEASE_VERSION=\"$(JRE_RELEASE_VER)\""
-CPP_FLAGS=$(CPP_FLAGS) /D "HOTSPOT_LIB_ARCH=\"$(HOTSPOT_LIB_ARCH)\""
-CPP_FLAGS=$(CPP_FLAGS) /D "HOTSPOT_BUILD_TARGET=\"$(BUILD_FLAVOR)\""
-CPP_FLAGS=$(CPP_FLAGS) /D "HOTSPOT_BUILD_USER=\"$(BuildUser)\""
-CPP_FLAGS=$(CPP_FLAGS) /D "HOTSPOT_VM_DISTRO=\"$(HOTSPOT_VM_DISTRO)\""
+CXX_FLAGS=$(CXX_FLAGS) /D "HOTSPOT_RELEASE_VERSION=\"$(HS_BUILD_VER)\""
+CXX_FLAGS=$(CXX_FLAGS) /D "JRE_RELEASE_VERSION=\"$(JRE_RELEASE_VER)\""
+CXX_FLAGS=$(CXX_FLAGS) /D "HOTSPOT_LIB_ARCH=\"$(HOTSPOT_LIB_ARCH)\""
+CXX_FLAGS=$(CXX_FLAGS) /D "HOTSPOT_BUILD_TARGET=\"$(BUILD_FLAVOR)\""
+CXX_FLAGS=$(CXX_FLAGS) /D "HOTSPOT_BUILD_USER=\"$(BuildUser)\""
+CXX_FLAGS=$(CXX_FLAGS) /D "HOTSPOT_VM_DISTRO=\"$(HOTSPOT_VM_DISTRO)\""
 
-CPP_FLAGS=$(CPP_FLAGS) $(CPP_INCLUDE_DIRS)
+!ifndef JAVASE_EMBEDDED
+CXX_FLAGS=$(CXX_FLAGS) /D "INCLUDE_TRACE"
+!endif
+
+CXX_FLAGS=$(CXX_FLAGS) $(CXX_INCLUDE_DIRS)
 
 # Define that so jni.h is on correct side
-CPP_FLAGS=$(CPP_FLAGS) /D "_JNI_IMPLEMENTATION_"
+CXX_FLAGS=$(CXX_FLAGS) /D "_JNI_IMPLEMENTATION_"
 
 !if "$(BUILDARCH)" == "ia64"
 STACK_SIZE="/STACK:1048576,262144"
@@ -101,8 +105,8 @@
 !endif
 
 # If you modify exports below please do the corresponding changes in
-# src/share/tools/ProjectCreator/WinGammaPlatformVC7.java 
-LINK_FLAGS=$(LINK_FLAGS) $(STACK_SIZE) /subsystem:windows /dll /base:0x8000000 \
+# src/share/tools/ProjectCreator/WinGammaPlatformVC7.java
+LD_FLAGS=$(LD_FLAGS) $(STACK_SIZE) /subsystem:windows /dll /base:0x8000000 \
   /export:JNI_GetDefaultJavaVMInitArgs       \
   /export:JNI_CreateJavaVM                   \
   /export:JVM_FindClassFromBootLoader        \
@@ -118,25 +122,25 @@
   /export:JVM_GetThreadStateValues           \
   /export:JVM_InitAgentProperties
 
-CPP_INCLUDE_DIRS=/I "..\generated"
+CXX_INCLUDE_DIRS=/I "..\generated"
 
 !if exists($(ALTSRC)\share\vm)
-CPP_INCLUDE_DIRS=$(CPP_INCLUDE_DIRS) /I "$(ALTSRC)\share\vm"
+CXX_INCLUDE_DIRS=$(CXX_INCLUDE_DIRS) /I "$(ALTSRC)\share\vm"
 !endif
 
 !if exists($(ALTSRC)\os\windows\vm)
-CPP_INCLUDE_DIRS=$(CPP_INCLUDE_DIRS) /I "$(ALTSRC)\os\windows\vm"
+CXX_INCLUDE_DIRS=$(CXX_INCLUDE_DIRS) /I "$(ALTSRC)\os\windows\vm"
 !endif
 
 !if exists($(ALTSRC)\os_cpu\windows_$(Platform_arch)\vm)
-CPP_INCLUDE_DIRS=$(CPP_INCLUDE_DIRS) /I "$(ALTSRC)\os_cpu\windows_$(Platform_arch)\vm"
+CXX_INCLUDE_DIRS=$(CXX_INCLUDE_DIRS) /I "$(ALTSRC)\os_cpu\windows_$(Platform_arch)\vm"
 !endif
 
 !if exists($(ALTSRC)\cpu\$(Platform_arch)\vm)
-CPP_INCLUDE_DIRS=$(CPP_INCLUDE_DIRS) /I "$(ALTSRC)\cpu\$(Platform_arch)\vm"
+CXX_INCLUDE_DIRS=$(CXX_INCLUDE_DIRS) /I "$(ALTSRC)\cpu\$(Platform_arch)\vm"
 !endif
 
-CPP_INCLUDE_DIRS=$(CPP_INCLUDE_DIRS) \
+CXX_INCLUDE_DIRS=$(CXX_INCLUDE_DIRS) \
   /I "$(COMMONSRC)\share\vm" \
   /I "$(COMMONSRC)\share\vm\precompiled" \
   /I "$(COMMONSRC)\share\vm\prims" \
@@ -144,12 +148,12 @@
   /I "$(COMMONSRC)\os_cpu\windows_$(Platform_arch)\vm" \
   /I "$(COMMONSRC)\cpu\$(Platform_arch)\vm"
 
-CPP_DONT_USE_PCH=/D DONT_USE_PRECOMPILED_HEADER
+CXX_DONT_USE_PCH=/D DONT_USE_PRECOMPILED_HEADER
 
 !if "$(USE_PRECOMPILED_HEADER)" != "0"
-CPP_USE_PCH=/Fp"vm.pch" /Yu"precompiled.hpp"
+CXX_USE_PCH=/Fp"vm.pch" /Yu"precompiled.hpp"
 !else
-CPP_USE_PCH=$(CPP_DONT_USE_PCH)
+CXX_USE_PCH=$(CXX_DONT_USE_PCH)
 !endif
 
 # Where to find the source code for the virtual machine (is this used?)
@@ -175,6 +179,7 @@
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/prims
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/runtime
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/services
+VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/trace
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/utilities
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/libadt
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/os/windows/vm
@@ -182,103 +187,113 @@
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/cpu/$(Platform_arch)/vm
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/opto
 
+!if exists($(ALTSRC)\share\vm\jfr)
+VM_PATH=$(VM_PATH);$(ALTSRC)/share/vm/jfr/agent
+VM_PATH=$(VM_PATH);$(ALTSRC)/share/vm/jfr/agent/isolated_deps/util
+VM_PATH=$(VM_PATH);$(ALTSRC)/share/vm/jfr/jvm
+VM_PATH=$(VM_PATH);$(ALTSRC)/share/vm/jfr
+!endif
+
 VM_PATH={$(VM_PATH)}
 
 # Special case files not using precompiled header files.
 
 c1_RInfo_$(Platform_arch).obj: $(WorkSpace)\src\cpu\$(Platform_arch)\vm\c1_RInfo_$(Platform_arch).cpp 
-	 $(CPP) $(CPP_FLAGS) $(CPP_DONT_USE_PCH) /c $(WorkSpace)\src\cpu\$(Platform_arch)\vm\c1_RInfo_$(Platform_arch).cpp
+	 $(CXX) $(CXX_FLAGS) $(CXX_DONT_USE_PCH) /c $(WorkSpace)\src\cpu\$(Platform_arch)\vm\c1_RInfo_$(Platform_arch).cpp
 
 os_windows.obj: $(WorkSpace)\src\os\windows\vm\os_windows.cpp
-        $(CPP) $(CPP_FLAGS) $(CPP_DONT_USE_PCH) /c $(WorkSpace)\src\os\windows\vm\os_windows.cpp
+        $(CXX) $(CXX_FLAGS) $(CXX_DONT_USE_PCH) /c $(WorkSpace)\src\os\windows\vm\os_windows.cpp
 
 os_windows_$(Platform_arch).obj: $(WorkSpace)\src\os_cpu\windows_$(Platform_arch)\vm\os_windows_$(Platform_arch).cpp
-        $(CPP) $(CPP_FLAGS) $(CPP_DONT_USE_PCH) /c $(WorkSpace)\src\os_cpu\windows_$(Platform_arch)\vm\os_windows_$(Platform_arch).cpp
+        $(CXX) $(CXX_FLAGS) $(CXX_DONT_USE_PCH) /c $(WorkSpace)\src\os_cpu\windows_$(Platform_arch)\vm\os_windows_$(Platform_arch).cpp
 
 osThread_windows.obj: $(WorkSpace)\src\os\windows\vm\osThread_windows.cpp
-        $(CPP) $(CPP_FLAGS) $(CPP_DONT_USE_PCH) /c $(WorkSpace)\src\os\windows\vm\osThread_windows.cpp
+        $(CXX) $(CXX_FLAGS) $(CXX_DONT_USE_PCH) /c $(WorkSpace)\src\os\windows\vm\osThread_windows.cpp
 
 conditionVar_windows.obj: $(WorkSpace)\src\os\windows\vm\conditionVar_windows.cpp
-        $(CPP) $(CPP_FLAGS) $(CPP_DONT_USE_PCH) /c $(WorkSpace)\src\os\windows\vm\conditionVar_windows.cpp
+        $(CXX) $(CXX_FLAGS) $(CXX_DONT_USE_PCH) /c $(WorkSpace)\src\os\windows\vm\conditionVar_windows.cpp
 
 getThread_windows_$(Platform_arch).obj: $(WorkSpace)\src\os_cpu\windows_$(Platform_arch)\vm\getThread_windows_$(Platform_arch).cpp
-        $(CPP) $(CPP_FLAGS) $(CPP_DONT_USE_PCH) /c $(WorkSpace)\src\os_cpu\windows_$(Platform_arch)\vm\getThread_windows_$(Platform_arch).cpp
+        $(CXX) $(CXX_FLAGS) $(CXX_DONT_USE_PCH) /c $(WorkSpace)\src\os_cpu\windows_$(Platform_arch)\vm\getThread_windows_$(Platform_arch).cpp
 
 opcodes.obj: $(WorkSpace)\src\share\vm\opto\opcodes.cpp
-        $(CPP) $(CPP_FLAGS) $(CPP_DONT_USE_PCH) /c $(WorkSpace)\src\share\vm\opto\opcodes.cpp
+        $(CXX) $(CXX_FLAGS) $(CXX_DONT_USE_PCH) /c $(WorkSpace)\src\share\vm\opto\opcodes.cpp
 
 bytecodeInterpreter.obj: $(WorkSpace)\src\share\vm\interpreter\bytecodeInterpreter.cpp
-        $(CPP) $(CPP_FLAGS) $(CPP_DONT_USE_PCH) /c $(WorkSpace)\src\share\vm\interpreter\bytecodeInterpreter.cpp
+        $(CXX) $(CXX_FLAGS) $(CXX_DONT_USE_PCH) /c $(WorkSpace)\src\share\vm\interpreter\bytecodeInterpreter.cpp
 
 bytecodeInterpreterWithChecks.obj: ..\generated\jvmtifiles\bytecodeInterpreterWithChecks.cpp
-        $(CPP) $(CPP_FLAGS) $(CPP_DONT_USE_PCH) /c ..\generated\jvmtifiles\bytecodeInterpreterWithChecks.cpp
+        $(CXX) $(CXX_FLAGS) $(CXX_DONT_USE_PCH) /c ..\generated\jvmtifiles\bytecodeInterpreterWithChecks.cpp
 
 # Default rules for the Virtual Machine
 {$(COMMONSRC)\share\vm\c1}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\share\vm\compiler}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\share\vm\code}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\share\vm\interpreter}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\share\vm\ci}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\share\vm\classfile}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\share\vm\gc_implementation\parallelScavenge}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\share\vm\gc_implementation\shared}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\share\vm\gc_implementation\parNew}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\share\vm\gc_implementation\concurrentMarkSweep}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\share\vm\gc_implementation\g1}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\share\vm\gc_interface}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\share\vm\asm}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\share\vm\memory}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\share\vm\oops}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\share\vm\prims}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\share\vm\runtime}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\share\vm\services}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
+
+{$(COMMONSRC)\share\vm\trace}.cpp.obj::
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\share\vm\utilities}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\share\vm\libadt}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\share\vm\opto}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\os\windows\vm}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 # This guy should remain a single colon rule because
 # otherwise we can't specify the output filename.
@@ -286,98 +301,113 @@
         @$(RC) $(RC_FLAGS) /fo"$@" $<
 
 {$(COMMONSRC)\cpu\$(Platform_arch)\vm}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(COMMONSRC)\os_cpu\windows_$(Platform_arch)\vm}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\c1}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\compiler}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\code}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\interpreter}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\ci}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\classfile}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\gc_implementation\parallelScavenge}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\gc_implementation\shared}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\gc_implementation\parNew}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\gc_implementation\concurrentMarkSweep}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\gc_implementation\g1}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\gc_interface}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\asm}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\memory}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\oops}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\prims}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\runtime}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\services}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
+
+{$(ALTSRC)\share\vm\trace}.cpp.obj::
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\utilities}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\libadt}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\share\vm\opto}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\os\windows\vm}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 # otherwise we can't specify the output filename.
 {$(ALTSRC)\os\windows\vm}.rc.res:
         @$(RC) $(RC_FLAGS) /fo"$@" $<
 
 {$(ALTSRC)\cpu\$(Platform_arch)\vm}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {$(ALTSRC)\os_cpu\windows_$(Platform_arch)\vm}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {..\generated\incls}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {..\generated\adfiles}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 {..\generated\jvmtifiles}.cpp.obj::
-        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
+
+{$(ALTSRC)\share\vm\jfr}.cpp.obj::
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
+
+{$(ALTSRC)\share\vm\jfr\agent}.cpp.obj::
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
+
+{$(ALTSRC)\share\vm\jfr\agent\isolated_deps\util}.cpp.obj::
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
+
+{$(ALTSRC)\share\vm\jfr\jvm}.cpp.obj::
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
 default::
 
 _build_pch_file.obj:
         @echo #include "precompiled.hpp" > ../generated/_build_pch_file.cpp
-        $(CPP) $(CPP_FLAGS) /Fp"vm.pch" /Yc"precompiled.hpp" /c ../generated/_build_pch_file.cpp
+        $(CXX) $(CXX_FLAGS) /Fp"vm.pch" /Yc"precompiled.hpp" /c ../generated/_build_pch_file.cpp
--- a/src/cpu/sparc/vm/assembler_sparc.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/sparc/vm/assembler_sparc.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -3036,10 +3036,8 @@
                                                    Label* L_failure,
                                                    Label* L_slow_path,
                                         RegisterOrConstant super_check_offset) {
-  int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
-                   Klass::secondary_super_cache_offset_in_bytes());
-  int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
-                    Klass::super_check_offset_offset_in_bytes());
+  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
+  int sco_offset = in_bytes(Klass::super_check_offset_offset());
 
   bool must_load_sco  = (super_check_offset.constant_or_zero() == -1);
   bool need_slow_path = (must_load_sco ||
@@ -3159,10 +3157,8 @@
   assert(label_nulls <= 1, "at most one NULL in the batch");
 
   // a couple of useful fields in sub_klass:
-  int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
-                   Klass::secondary_supers_offset_in_bytes());
-  int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
-                   Klass::secondary_super_cache_offset_in_bytes());
+  int ss_offset = in_bytes(Klass::secondary_supers_offset());
+  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
 
   // Do a linear scan of the secondary super-klass chain.
   // This code is rarely used, so simplicity is a virtue here.
@@ -3336,7 +3332,7 @@
   cmp_and_brx_short(temp_reg, markOopDesc::biased_lock_pattern, Assembler::notEqual, Assembler::pn, cas_label);
 
   load_klass(obj_reg, temp_reg);
-  ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
+  ld_ptr(Address(temp_reg, Klass::prototype_header_offset()), temp_reg);
   or3(G2_thread, temp_reg, temp_reg);
   xor3(mark_reg, temp_reg, temp_reg);
   andcc(temp_reg, ~((int) markOopDesc::age_mask_in_place), temp_reg);
@@ -3413,7 +3409,7 @@
   // FIXME: due to a lack of registers we currently blow away the age
   // bits in this situation. Should attempt to preserve them.
   load_klass(obj_reg, temp_reg);
-  ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
+  ld_ptr(Address(temp_reg, Klass::prototype_header_offset()), temp_reg);
   or3(G2_thread, temp_reg, temp_reg);
   casn(mark_addr.base(), mark_reg, temp_reg);
   // If the biasing toward our thread failed, this means that
@@ -3443,7 +3439,7 @@
   // FIXME: due to a lack of registers we currently blow away the age
   // bits in this situation. Should attempt to preserve them.
   load_klass(obj_reg, temp_reg);
-  ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
+  ld_ptr(Address(temp_reg, Klass::prototype_header_offset()), temp_reg);
   casn(mark_addr.base(), mark_reg, temp_reg);
   // Fall through to the normal CAS-based lock, because no matter what
   // the result of the above CAS, some thread must have succeeded in
--- a/src/cpu/sparc/vm/assembler_sparc.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/sparc/vm/assembler_sparc.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -2134,6 +2134,7 @@
   // address pseudos: make these names unlike instruction names to avoid confusion
   inline intptr_t load_pc_address( Register reg, int bytes_to_skip );
   inline void load_contents(const AddressLiteral& addrlit, Register d, int offset = 0);
+  inline void load_bool_contents(const AddressLiteral& addrlit, Register d, int offset = 0);
   inline void load_ptr_contents(const AddressLiteral& addrlit, Register d, int offset = 0);
   inline void store_contents(Register s, const AddressLiteral& addrlit, Register temp, int offset = 0);
   inline void store_ptr_contents(Register s, const AddressLiteral& addrlit, Register temp, int offset = 0);
@@ -2249,7 +2250,7 @@
   // this platform we assume byte size
 
   inline void stbool(Register d, const Address& a) { stb(d, a); }
-  inline void ldbool(const Address& a, Register d) { ldsb(a, d); }
+  inline void ldbool(const Address& a, Register d) { ldub(a, d); }
   inline void movbool( bool boolconst, Register d) { mov( (int) boolconst, d); }
 
   // klass oop manipulations if compressed
--- a/src/cpu/sparc/vm/assembler_sparc.inline.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/sparc/vm/assembler_sparc.inline.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -692,6 +692,17 @@
 }
 
 
+inline void MacroAssembler::load_bool_contents(const AddressLiteral& addrlit, Register d, int offset) {
+  assert_not_delayed();
+  if (ForceUnreachable) {
+    patchable_sethi(addrlit, d);
+  } else {
+    sethi(addrlit, d);
+  }
+  ldub(d, addrlit.low10() + offset, d);
+}
+
+
 inline void MacroAssembler::load_ptr_contents(const AddressLiteral& addrlit, Register d, int offset) {
   assert_not_delayed();
   if (ForceUnreachable) {
--- a/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -302,7 +302,7 @@
     assert(_obj != noreg, "must be a valid register");
     assert(_oop_index >= 0, "must have oop index");
     __ load_heap_oop(_obj, java_lang_Class::klass_offset_in_bytes(), G3);
-    __ ld_ptr(G3, instanceKlass::init_thread_offset_in_bytes() + sizeof(klassOopDesc), G3);
+    __ ld_ptr(G3, in_bytes(instanceKlass::init_thread_offset()), G3);
     __ cmp_and_brx_short(G2_thread, G3, Assembler::notEqual, Assembler::pn, call_patch);
 
     // load_klass patches may execute the patched code before it's
@@ -471,8 +471,8 @@
 
   __ load_klass(src_reg, tmp_reg);
 
-  Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset_in_bytes() + sizeof(oopDesc));
-  __ ld(ref_type_adr, tmp_reg);
+  Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset());
+  __ ldub(ref_type_adr, tmp_reg);
 
   // _reference_type field is of type ReferenceType (enum)
   assert(REF_NONE == 0, "check this code");
--- a/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -391,7 +391,7 @@
   __ call(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id), relocInfo::runtime_call_type);
   __ delayed()->nop();
   __ should_not_reach_here();
-  assert(code_offset() - offset <= exception_handler_size, "overflow");
+  guarantee(code_offset() - offset <= exception_handler_size, "overflow");
   __ end_a_stub();
 
   return offset;
@@ -474,8 +474,7 @@
   AddressLiteral deopt_blob(SharedRuntime::deopt_blob()->unpack());
   __ JUMP(deopt_blob, G3_scratch, 0); // sethi;jmp
   __ delayed()->nop();
-  assert(code_offset() - offset <= deopt_handler_size, "overflow");
-  debug_only(__ stop("should have gone to the caller");)
+  guarantee(code_offset() - offset <= deopt_handler_size, "overflow");
   __ end_a_stub();
 
   return offset;
@@ -2202,8 +2201,7 @@
           } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) {
             __ load_klass(dst, tmp);
           }
-          int lh_offset = klassOopDesc::header_size() * HeapWordSize +
-            Klass::layout_helper_offset_in_bytes();
+          int lh_offset = in_bytes(Klass::layout_helper_offset());
 
           __ lduw(tmp, lh_offset, tmp2);
 
@@ -2238,12 +2236,10 @@
         __ mov(length, len);
         __ load_klass(dst, tmp);
 
-        int ek_offset = (klassOopDesc::header_size() * HeapWordSize +
-                         objArrayKlass::element_klass_offset_in_bytes());
+        int ek_offset = in_bytes(objArrayKlass::element_klass_offset());
         __ ld_ptr(tmp, ek_offset, super_k);
 
-        int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
-                          Klass::super_check_offset_offset_in_bytes());
+        int sco_offset = in_bytes(Klass::super_check_offset_offset());
         __ lduw(super_k, sco_offset, chk_off);
 
         __ call_VM_leaf(tmp, copyfunc_addr);
@@ -2455,8 +2451,8 @@
          op->obj()->as_register()   == O0 &&
          op->klass()->as_register() == G5, "must be");
   if (op->init_check()) {
-    __ ld(op->klass()->as_register(),
-          instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc),
+    __ ldub(op->klass()->as_register(),
+          in_bytes(instanceKlass::init_state_offset()),
           op->tmp1()->as_register());
     add_debug_info_for_null_check_here(op->stub()->info());
     __ cmp(op->tmp1()->as_register(), instanceKlass::fully_initialized);
@@ -2627,7 +2623,7 @@
   } else {
     bool need_slow_path = true;
     if (k->is_loaded()) {
-      if (k->super_check_offset() != sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())
+      if ((int) k->super_check_offset() != in_bytes(Klass::secondary_super_cache_offset()))
         need_slow_path = false;
       // perform the fast part of the checking logic
       __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, noreg,
@@ -2731,7 +2727,7 @@
     __ load_klass(value, klass_RInfo);
 
     // get instance klass
-    __ ld_ptr(Address(k_RInfo, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)), k_RInfo);
+    __ ld_ptr(Address(k_RInfo, objArrayKlass::element_klass_offset()), k_RInfo);
     // perform the fast part of the checking logic
     __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, O7, success_target, failure_target, NULL);
 
@@ -3235,6 +3231,26 @@
   // no-op on TSO
 }
 
+void LIR_Assembler::membar_loadload() {
+  // no-op
+  //__ membar(Assembler::Membar_mask_bits(Assembler::loadload));
+}
+
+void LIR_Assembler::membar_storestore() {
+  // no-op
+  //__ membar(Assembler::Membar_mask_bits(Assembler::storestore));
+}
+
+void LIR_Assembler::membar_loadstore() {
+  // no-op
+  //__ membar(Assembler::Membar_mask_bits(Assembler::loadstore));
+}
+
+void LIR_Assembler::membar_storeload() {
+  __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
+}
+
+
 // Pack two sequential registers containing 32 bit values
 // into a single 64 bit register.
 // src and src->successor() are packed into dst
--- a/src/cpu/sparc/vm/c1_LIRAssembler_sparc.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/sparc/vm/c1_LIRAssembler_sparc.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -69,7 +69,7 @@
 #else
          call_stub_size = 20,
 #endif // _LP64
-         exception_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(10*4),
-         deopt_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(10*4) };
+         exception_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(128),
+         deopt_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(64)  };
 
 #endif // CPU_SPARC_VM_C1_LIRASSEMBLER_SPARC_HPP
--- a/src/cpu/sparc/vm/c1_MacroAssembler_sparc.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/sparc/vm/c1_MacroAssembler_sparc.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -181,7 +181,7 @@
 void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register t1, Register t2) {
   assert_different_registers(obj, klass, len, t1, t2);
   if (UseBiasedLocking && !len->is_valid()) {
-    ld_ptr(klass, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes(), t1);
+    ld_ptr(klass, in_bytes(Klass::prototype_header_offset()), t1);
   } else {
     set((intx)markOopDesc::prototype(), t1);
   }
@@ -252,7 +252,7 @@
 #ifdef ASSERT
   {
     Label ok;
-    ld(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes(), t1);
+    ld(klass, in_bytes(Klass::layout_helper_offset()), t1);
     if (var_size_in_bytes != noreg) {
       cmp_and_brx_short(t1, var_size_in_bytes, Assembler::equal, Assembler::pt, ok);
     } else {
--- a/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -398,14 +398,14 @@
 
           if (id == fast_new_instance_init_check_id) {
             // make sure the klass is initialized
-            __ ld(G5_klass, instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc), G3_t1);
+            __ ldub(G5_klass, in_bytes(instanceKlass::init_state_offset()), G3_t1);
             __ cmp_and_br_short(G3_t1, instanceKlass::fully_initialized, Assembler::notEqual, Assembler::pn, slow_path);
           }
 #ifdef ASSERT
           // assert object can be fast path allocated
           {
             Label ok, not_ok;
-          __ ld(G5_klass, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc), G1_obj_size);
+          __ ld(G5_klass, in_bytes(Klass::layout_helper_offset()), G1_obj_size);
           // make sure it's an instance (LH > 0)
           __ cmp_and_br_short(G1_obj_size, 0, Assembler::lessEqual, Assembler::pn, not_ok);
           __ btst(Klass::_lh_instance_slow_path_bit, G1_obj_size);
@@ -425,7 +425,7 @@
           __ bind(retry_tlab);
 
           // get the instance size
-          __ ld(G5_klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes(), G1_obj_size);
+          __ ld(G5_klass, in_bytes(Klass::layout_helper_offset()), G1_obj_size);
 
           __ tlab_allocate(O0_obj, G1_obj_size, 0, G3_t1, slow_path);
 
@@ -437,7 +437,7 @@
 
           __ bind(try_eden);
           // get the instance size
-          __ ld(G5_klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes(), G1_obj_size);
+          __ ld(G5_klass, in_bytes(Klass::layout_helper_offset()), G1_obj_size);
           __ eden_allocate(O0_obj, G1_obj_size, 0, G3_t1, G4_t2, slow_path);
           __ incr_allocated_bytes(G1_obj_size, G3_t1, G4_t2);
 
@@ -471,8 +471,7 @@
         Register G4_length = G4; // Incoming
         Register O0_obj   = O0; // Outgoing
 
-        Address klass_lh(G5_klass, ((klassOopDesc::header_size() * HeapWordSize)
-                                    + Klass::layout_helper_offset_in_bytes()));
+        Address klass_lh(G5_klass, Klass::layout_helper_offset());
         assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise");
         assert(Klass::_lh_header_size_mask == 0xFF, "bytewise");
         // Use this offset to pick out an individual byte of the layout_helper:
@@ -592,7 +591,7 @@
         Label register_finalizer;
         Register t = O1;
         __ load_klass(O0, t);
-        __ ld(t, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc), t);
+        __ ld(t, in_bytes(Klass::access_flags_offset()), t);
         __ set(JVM_ACC_HAS_FINALIZER, G3);
         __ andcc(G3, t, G0);
         __ br(Assembler::notZero, false, Assembler::pt, register_finalizer);
--- a/src/cpu/sparc/vm/c2_globals_sparc.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/sparc/vm/c2_globals_sparc.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -42,7 +42,7 @@
 #else
 define_pd_global(bool, ProfileInterpreter,           true);
 #endif // CC_INTERP
-define_pd_global(bool, TieredCompilation,            true);
+define_pd_global(bool, TieredCompilation,            trueInTiered);
 define_pd_global(intx, CompileThreshold,             10000);
 define_pd_global(intx, BackEdgeThreshold,            140000);
 
--- a/src/cpu/sparc/vm/cppInterpreter_sparc.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/sparc/vm/cppInterpreter_sparc.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -766,7 +766,7 @@
       // get native function entry point(O0 is a good temp until the very end)
        ld_ptr(Address(G5_method, 0, in_bytes(methodOopDesc::native_function_offset())), O0);
     // for static methods insert the mirror argument
-    const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes();
+    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
 
     __ ld_ptr(Address(G5_method, 0, in_bytes(methodOopDesc:: constants_offset())), O1);
     __ ld_ptr(Address(O1, 0, constantPoolOopDesc::pool_holder_offset_in_bytes()), O1);
@@ -1173,7 +1173,7 @@
     __ btst(JVM_ACC_SYNCHRONIZED, O1);
     __ br( Assembler::zero, false, Assembler::pt, done);
 
-    const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes();
+    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
     __ delayed()->btst(JVM_ACC_STATIC, O1);
     __ ld_ptr(XXX_STATE(_locals), O1);
     __ br( Assembler::zero, true, Assembler::pt, got_obj);
--- a/src/cpu/sparc/vm/frame_sparc.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/sparc/vm/frame_sparc.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -28,6 +28,7 @@
 #include "oops/markOop.hpp"
 #include "oops/methodOop.hpp"
 #include "oops/oop.inline.hpp"
+#include "prims/methodHandles.hpp"
 #include "runtime/frame.inline.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/javaCalls.hpp"
@@ -810,7 +811,7 @@
 }
 
 
-#ifdef ASSERT
+#ifndef PRODUCT
 
 #define DESCRIBE_FP_OFFSET(name) \
   values.describe(frame_no, fp() + frame::name##_offset, #name)
@@ -820,11 +821,19 @@
     values.describe(frame_no, sp() + w, err_msg("register save area word %d", w), 1);
   }
 
-  if (is_interpreted_frame()) {
+  if (is_ricochet_frame()) {
+    MethodHandles::RicochetFrame::describe(this, values, frame_no);
+  } else if (is_interpreted_frame()) {
     DESCRIBE_FP_OFFSET(interpreter_frame_d_scratch_fp);
     DESCRIBE_FP_OFFSET(interpreter_frame_l_scratch_fp);
     DESCRIBE_FP_OFFSET(interpreter_frame_padding);
     DESCRIBE_FP_OFFSET(interpreter_frame_oop_temp);
+
+    // esp, according to Lesp (e.g. not depending on bci), if seems valid
+    intptr_t* esp = *interpreter_frame_esp_addr();
+    if ((esp >= sp()) && (esp < fp())) {
+      values.describe(-1, esp, "*Lesp");
+    }
   }
 
   if (!is_compiled_frame()) {
@@ -844,4 +853,3 @@
   // unused... but returns fp() to minimize changes introduced by 7087445
   return fp();
 }
-
--- a/src/cpu/sparc/vm/frame_sparc.inline.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/sparc/vm/frame_sparc.inline.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -82,6 +82,8 @@
 
 inline intptr_t*    frame::sender_sp() const  { return fp(); }
 
+inline intptr_t* frame::real_fp() const { return fp(); }
+
 // Used only in frame::oopmapreg_to_location
 // This return a value in VMRegImpl::slot_size
 inline int frame::pd_oop_map_offset_adjustment() const {
--- a/src/cpu/sparc/vm/globals_sparc.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/sparc/vm/globals_sparc.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -56,14 +56,15 @@
 // Stack slots are 2X larger in LP64 than in the 32 bit VM.
 define_pd_global(intx, ThreadStackSize,       1024);
 define_pd_global(intx, VMThreadStackSize,     1024);
+define_pd_global(intx, StackShadowPages, 10 DEBUG_ONLY(+1));
 #else
 define_pd_global(intx, ThreadStackSize,       512);
 define_pd_global(intx, VMThreadStackSize,     512);
+define_pd_global(intx, StackShadowPages, 3 DEBUG_ONLY(+1));
 #endif
 
 define_pd_global(intx, StackYellowPages, 2);
 define_pd_global(intx, StackRedPages, 1);
-define_pd_global(intx, StackShadowPages, 3 DEBUG_ONLY(+1));
 
 define_pd_global(intx, PreInflateSpin,       40);  // Determined by running design center
 
--- a/src/cpu/sparc/vm/methodHandles_sparc.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/sparc/vm/methodHandles_sparc.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2008, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -177,7 +177,7 @@
   BLOCK_COMMENT("ricochet_blob.bounce");
 
   if (VerifyMethodHandles)  RicochetFrame::verify_clean(_masm);
-  trace_method_handle(_masm, "ricochet_blob.bounce");
+  trace_method_handle(_masm, "return/ricochet_blob.bounce");
 
   __ JMP(L1_continuation, 0);
   __ delayed()->nop();
@@ -268,14 +268,16 @@
 }
 
 // Emit code to verify that FP is pointing at a valid ricochet frame.
-#ifdef ASSERT
+#ifndef PRODUCT
 enum {
   ARG_LIMIT = 255, SLOP = 45,
   // use this parameter for checking for garbage stack movements:
   UNREASONABLE_STACK_MOVE = (ARG_LIMIT + SLOP)
   // the slop defends against false alarms due to fencepost errors
 };
+#endif
 
+#ifdef ASSERT
 void MethodHandles::RicochetFrame::verify_clean(MacroAssembler* _masm) {
   // The stack should look like this:
   //    ... keep1 | dest=42 | keep2 | magic | handler | magic | recursive args | [RF]
@@ -874,7 +876,14 @@
   }
 #endif
   __ cmp_and_brx_short(bottom_reg, top_reg, Assembler::greaterEqualUnsigned, Assembler::pn, L_break);
->>
+  // work top down to bottom, copying contiguous data upwards
+  // In pseudo-code:
+  //   while (--top >= bottom) *(top + distance) = *(top + 0);
+  RegisterOrConstant offset = __ argument_offset(positive_distance_in_slots, positive_distance_in_slots.register_or_noreg());
+  __ BIND(L_loop);
+  __ sub(top_reg, wordSize, top_reg);
+  __ ld_ptr(           Address(top_reg, 0     ), temp2_reg);
+  __ st_ptr(temp2_reg, Address(top_reg, offset)           );
   __ cmp_and_brx_short(top_reg, bottom_reg, Assembler::greaterUnsigned, Assembler::pt, L_loop);
   assert(Interpreter::stackElementSize == wordSize, "else change loop");
   __ BIND(L_break);
@@ -994,31 +1003,142 @@
 }
 
 #ifndef PRODUCT
+void MethodHandles::RicochetFrame::describe(const frame* fr, FrameValues& values, int frame_no)  {
+    RicochetFrame* rf = new RicochetFrame(*fr);
+
+    // ricochet slots (kept in registers for sparc)
+    values.describe(frame_no, rf->register_addr(I5_savedSP), err_msg("exact_sender_sp reg for #%d", frame_no));
+    values.describe(frame_no, rf->register_addr(L5_conversion), err_msg("conversion reg for #%d", frame_no));
+    values.describe(frame_no, rf->register_addr(L4_saved_args_base), err_msg("saved_args_base reg for #%d", frame_no));
+    values.describe(frame_no, rf->register_addr(L3_saved_args_layout), err_msg("saved_args_layout reg for #%d", frame_no));
+    values.describe(frame_no, rf->register_addr(L2_saved_target), err_msg("saved_target reg for #%d", frame_no));
+    values.describe(frame_no, rf->register_addr(L1_continuation), err_msg("continuation reg for #%d", frame_no));
+
+    // relevant ricochet targets (in caller frame)
+    values.describe(-1, rf->saved_args_base(),  err_msg("*saved_args_base for #%d", frame_no));
+    values.describe(-1, (intptr_t *)(STACK_BIAS+(uintptr_t)rf->exact_sender_sp()),  err_msg("*exact_sender_sp+STACK_BIAS for #%d", frame_no));
+}
+#endif // ASSERT
+
+#ifndef PRODUCT
 extern "C" void print_method_handle(oop mh);
 void trace_method_handle_stub(const char* adaptername,
                               oopDesc* mh,
-                              intptr_t* saved_sp) {
+                              intptr_t* saved_sp,
+                              intptr_t* args,
+                              intptr_t* tracing_fp) {
   bool has_mh = (strstr(adaptername, "return/") == NULL);  // return adapters don't have mh
-  tty->print_cr("MH %s mh="INTPTR_FORMAT " saved_sp=" INTPTR_FORMAT, adaptername, (intptr_t) mh, saved_sp);
-  if (has_mh)
+
+  tty->print_cr("MH %s mh="INTPTR_FORMAT " saved_sp=" INTPTR_FORMAT " args=" INTPTR_FORMAT, adaptername, (intptr_t) mh, saved_sp, args);
+
+  if (Verbose) {
+    // dumping last frame with frame::describe
+
+    JavaThread* p = JavaThread::active();
+
+    ResourceMark rm;
+    PRESERVE_EXCEPTION_MARK; // may not be needed by safer and unexpensive here
+    FrameValues values;
+
+    // Note: We want to allow trace_method_handle from any call site.
+    // While trace_method_handle creates a frame, it may be entered
+    // without a valid return PC in O7 (e.g. not just after a call).
+    // Walking that frame could lead to failures due to that invalid PC.
+    // => carefully detect that frame when doing the stack walking
+
+    // walk up to the right frame using the "tracing_fp" argument
+    intptr_t* cur_sp = StubRoutines::Sparc::flush_callers_register_windows_func()();
+    frame cur_frame(cur_sp, frame::unpatchable, NULL);
+
+    while (cur_frame.fp() != (intptr_t *)(STACK_BIAS+(uintptr_t)tracing_fp)) {
+      cur_frame = os::get_sender_for_C_frame(&cur_frame);
+    }
+
+    // safely create a frame and call frame::describe
+    intptr_t *dump_sp = cur_frame.sender_sp();
+    intptr_t *dump_fp = cur_frame.link();
+
+    bool walkable = has_mh; // whether the traced frame shoud be walkable
+
+    // the sender for cur_frame is the caller of trace_method_handle
+    if (walkable) {
+      // The previous definition of walkable may have to be refined
+      // if new call sites cause the next frame constructor to start
+      // failing. Alternatively, frame constructors could be
+      // modified to support the current or future non walkable
+      // frames (but this is more intrusive and is not considered as
+      // part of this RFE, which will instead use a simpler output).
+      frame dump_frame = frame(dump_sp,
+                               cur_frame.sp(), // younger_sp
+                               false); // no adaptation
+      dump_frame.describe(values, 1);
+    } else {
+      // Robust dump for frames which cannot be constructed from sp/younger_sp
+      // Add descriptions without building a Java frame to avoid issues
+      values.describe(-1, dump_fp, "fp for #1 <not parsed, cannot trust pc>");
+      values.describe(-1, dump_sp, "sp");
+    }
+
+    bool has_args = has_mh; // whether Gargs is meaningful
+
+    // mark args, if seems valid (may not be valid for some adapters)
+    if (has_args) {
+      if ((args >= dump_sp) && (args < dump_fp)) {
+        values.describe(-1, args, "*G4_args");
+      }
+    }
+
+    // mark saved_sp, if seems valid (may not be valid for some adapters)
+    intptr_t *unbiased_sp = (intptr_t *)(STACK_BIAS+(uintptr_t)saved_sp);
+    if ((unbiased_sp >= dump_sp - UNREASONABLE_STACK_MOVE) && (unbiased_sp < dump_fp)) {
+      values.describe(-1, unbiased_sp, "*saved_sp+STACK_BIAS");
+    }
+
+    // Note: the unextended_sp may not be correct
+    tty->print_cr("  stack layout:");
+    values.print(p);
+  }
+
+  if (has_mh) {
     print_method_handle(mh);
+  }
 }
+
 void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) {
   if (!TraceMethodHandles)  return;
   BLOCK_COMMENT("trace_method_handle {");
   // save: Gargs, O5_savedSP
-  __ save_frame(16);
+  __ save_frame(16); // need space for saving required FPU state
+
   __ set((intptr_t) adaptername, O0);
   __ mov(G3_method_handle, O1);
   __ mov(I5_savedSP, O2);
+  __ mov(Gargs, O3);
+  __ mov(I6, O4); // frame identifier for safe stack walking
+
+  // Save scratched registers that might be needed. Robustness is more
+  // important than optimizing the saves for this debug only code.
+
+  // save FP result, valid at some call sites (adapter_opt_return_float, ...)
+  Address d_save(FP, -sizeof(jdouble) + STACK_BIAS);
+  __ stf(FloatRegisterImpl::D, Ftos_d, d_save);
+  // Safely save all globals but G2 (handled by call_VM_leaf) and G7
+  // (OS reserved).
   __ mov(G3_method_handle, L3);
   __ mov(Gargs, L4);
   __ mov(G5_method_type, L5);
-  __ call_VM_leaf(L7, CAST_FROM_FN_PTR(address, trace_method_handle_stub));
+  __ mov(G6, L6);
+  __ mov(G1, L1);
+
+  __ call_VM_leaf(L2 /* for G2 */, CAST_FROM_FN_PTR(address, trace_method_handle_stub));
 
   __ mov(L3, G3_method_handle);
   __ mov(L4, Gargs);
   __ mov(L5, G5_method_type);
+  __ mov(L6, G6);
+  __ mov(L1, G1);
+  __ ldf(FloatRegisterImpl::D, d_save, Ftos_d);
+
   __ restore();
   BLOCK_COMMENT("} trace_method_handle");
 }
@@ -1038,7 +1158,7 @@
          |(1<<java_lang_invoke_AdapterMethodHandle::OP_DROP_ARGS)
           // OP_COLLECT_ARGS is below...
          |(1<<java_lang_invoke_AdapterMethodHandle::OP_SPREAD_ARGS)
-         |(!UseRicochetFrames ? 0 :
+         |(
            java_lang_invoke_MethodTypeForm::vmlayout_offset_in_bytes() <= 0 ? 0 :
            ((1<<java_lang_invoke_AdapterMethodHandle::OP_PRIM_TO_REF)
            |(1<<java_lang_invoke_AdapterMethodHandle::OP_COLLECT_ARGS)
@@ -1091,7 +1211,7 @@
   Address G3_amh_argument ( G3_method_handle, java_lang_invoke_AdapterMethodHandle::argument_offset_in_bytes());
   Address G3_amh_conversion(G3_method_handle, java_lang_invoke_AdapterMethodHandle::conversion_offset_in_bytes());
 
-  const int java_mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes();
+  const int java_mirror_offset = in_bytes(Klass::java_mirror_offset());
 
   if (have_entry(ek)) {
     __ nop();  // empty stubs make SG sick
@@ -1243,7 +1363,7 @@
         move_typed_arg(_masm, arg_type, false,
                        prim_value_addr,
                        Address(O0_argslot, 0),
-                       O2_scratch);  // must be an even register for !_LP64 long moves (uses O2/O3)
+                      O2_scratch);  // must be an even register for !_LP64 long moves (uses O2/O3)
       }
 
       if (direct_to_method) {
--- a/src/cpu/sparc/vm/methodHandles_sparc.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/sparc/vm/methodHandles_sparc.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -145,6 +145,8 @@
   }
 
   static void verify_clean(MacroAssembler* _masm) NOT_DEBUG_RETURN;
+
+  static void describe(const frame* fr, FrameValues& values, int frame_no) PRODUCT_RETURN;
 };
 
 // Additional helper methods for MethodHandles code generation:
--- a/src/cpu/sparc/vm/sharedRuntime_sparc.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/sparc/vm/sharedRuntime_sparc.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -321,6 +321,16 @@
   return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
 }
 
+static VMRegPair reg64_to_VMRegPair(Register r) {
+  VMRegPair ret;
+  if (wordSize == 8) {
+    ret.set2(r->as_VMReg());
+  } else {
+    ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg());
+  }
+  return ret;
+}
+
 // ---------------------------------------------------------------------------
 // Read the array of BasicTypes from a signature, and compute where the
 // arguments should go.  Values in the VMRegPair regs array refer to 4-byte (VMRegImpl::stack_slot_size)
@@ -1444,6 +1454,25 @@
 }
 
 
+static void move_ptr(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
+  if (src.first()->is_stack()) {
+    if (dst.first()->is_stack()) {
+      // stack to stack
+      __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, L5);
+      __ st_ptr(L5, SP, reg2offset(dst.first()) + STACK_BIAS);
+    } else {
+      // stack to reg
+      __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register());
+    }
+  } else if (dst.first()->is_stack()) {
+    // reg to stack
+    __ st_ptr(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS);
+  } else {
+    __ mov(src.first()->as_Register(), dst.first()->as_Register());
+  }
+}
+
+
 // An oop arg. Must pass a handle not the oop itself
 static void object_move(MacroAssembler* masm,
                         OopMap* map,
@@ -1748,6 +1777,166 @@
   }
 }
 
+
+static void save_or_restore_arguments(MacroAssembler* masm,
+                                      const int stack_slots,
+                                      const int total_in_args,
+                                      const int arg_save_area,
+                                      OopMap* map,
+                                      VMRegPair* in_regs,
+                                      BasicType* in_sig_bt) {
+  // if map is non-NULL then the code should store the values,
+  // otherwise it should load them.
+  if (map != NULL) {
+    // Fill in the map
+    for (int i = 0; i < total_in_args; i++) {
+      if (in_sig_bt[i] == T_ARRAY) {
+        if (in_regs[i].first()->is_stack()) {
+          int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
+          map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
+        } else if (in_regs[i].first()->is_Register()) {
+          map->set_oop(in_regs[i].first());
+        } else {
+          ShouldNotReachHere();
+        }
+      }
+    }
+  }
+
+  // Save or restore double word values
+  int handle_index = 0;
+  for (int i = 0; i < total_in_args; i++) {
+    int slot = handle_index + arg_save_area;
+    int offset = slot * VMRegImpl::stack_slot_size;
+    if (in_sig_bt[i] == T_LONG && in_regs[i].first()->is_Register()) {
+      const Register reg = in_regs[i].first()->as_Register();
+      if (reg->is_global()) {
+        handle_index += 2;
+        assert(handle_index <= stack_slots, "overflow");
+        if (map != NULL) {
+          __ stx(reg, SP, offset + STACK_BIAS);
+        } else {
+          __ ldx(SP, offset + STACK_BIAS, reg);
+        }
+      }
+    } else if (in_sig_bt[i] == T_DOUBLE && in_regs[i].first()->is_FloatRegister()) {
+      handle_index += 2;
+      assert(handle_index <= stack_slots, "overflow");
+      if (map != NULL) {
+        __ stf(FloatRegisterImpl::D, in_regs[i].first()->as_FloatRegister(), SP, offset + STACK_BIAS);
+      } else {
+        __ ldf(FloatRegisterImpl::D, SP, offset + STACK_BIAS, in_regs[i].first()->as_FloatRegister());
+      }
+    }
+  }
+  // Save floats
+  for (int i = 0; i < total_in_args; i++) {
+    int slot = handle_index + arg_save_area;
+    int offset = slot * VMRegImpl::stack_slot_size;
+    if (in_sig_bt[i] == T_FLOAT && in_regs[i].first()->is_FloatRegister()) {
+      handle_index++;
+      assert(handle_index <= stack_slots, "overflow");
+      if (map != NULL) {
+        __ stf(FloatRegisterImpl::S, in_regs[i].first()->as_FloatRegister(), SP, offset + STACK_BIAS);
+      } else {
+        __ ldf(FloatRegisterImpl::S, SP, offset + STACK_BIAS, in_regs[i].first()->as_FloatRegister());
+      }
+    }
+  }
+
+}
+
+
+// Check GC_locker::needs_gc and enter the runtime if it's true.  This
+// keeps a new JNI critical region from starting until a GC has been
+// forced.  Save down any oops in registers and describe them in an
+// OopMap.
+static void check_needs_gc_for_critical_native(MacroAssembler* masm,
+                                               const int stack_slots,
+                                               const int total_in_args,
+                                               const int arg_save_area,
+                                               OopMapSet* oop_maps,
+                                               VMRegPair* in_regs,
+                                               BasicType* in_sig_bt) {
+  __ block_comment("check GC_locker::needs_gc");
+  Label cont;
+  AddressLiteral sync_state(GC_locker::needs_gc_address());
+  __ load_bool_contents(sync_state, G3_scratch);
+  __ cmp_zero_and_br(Assembler::equal, G3_scratch, cont);
+  __ delayed()->nop();
+
+  // Save down any values that are live in registers and call into the
+  // runtime to halt for a GC
+  OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
+  save_or_restore_arguments(masm, stack_slots, total_in_args,
+                            arg_save_area, map, in_regs, in_sig_bt);
+
+  __ mov(G2_thread, L7_thread_cache);
+
+  __ set_last_Java_frame(SP, noreg);
+
+  __ block_comment("block_for_jni_critical");
+  __ call(CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical), relocInfo::runtime_call_type);
+  __ delayed()->mov(L7_thread_cache, O0);
+  oop_maps->add_gc_map( __ offset(), map);
+
+  __ restore_thread(L7_thread_cache); // restore G2_thread
+  __ reset_last_Java_frame();
+
+  // Reload all the register arguments
+  save_or_restore_arguments(masm, stack_slots, total_in_args,
+                            arg_save_area, NULL, in_regs, in_sig_bt);
+
+  __ bind(cont);
+#ifdef ASSERT
+  if (StressCriticalJNINatives) {
+    // Stress register saving
+    OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
+    save_or_restore_arguments(masm, stack_slots, total_in_args,
+                              arg_save_area, map, in_regs, in_sig_bt);
+    // Destroy argument registers
+    for (int i = 0; i < total_in_args; i++) {
+      if (in_regs[i].first()->is_Register()) {
+        const Register reg = in_regs[i].first()->as_Register();
+        if (reg->is_global()) {
+          __ mov(G0, reg);
+        }
+      } else if (in_regs[i].first()->is_FloatRegister()) {
+        __ fneg(FloatRegisterImpl::D, in_regs[i].first()->as_FloatRegister(), in_regs[i].first()->as_FloatRegister());
+      }
+    }
+
+    save_or_restore_arguments(masm, stack_slots, total_in_args,
+                              arg_save_area, NULL, in_regs, in_sig_bt);
+  }
+#endif
+}
+
+// Unpack an array argument into a pointer to the body and the length
+// if the array is non-null, otherwise pass 0 for both.
+static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) {
+  // Pass the length, ptr pair
+  Label is_null, done;
+  if (reg.first()->is_stack()) {
+    VMRegPair tmp  = reg64_to_VMRegPair(L2);
+    // Load the arg up from the stack
+    move_ptr(masm, reg, tmp);
+    reg = tmp;
+  }
+  __ cmp(reg.first()->as_Register(), G0);
+  __ brx(Assembler::equal, false, Assembler::pt, is_null);
+  __ delayed()->add(reg.first()->as_Register(), arrayOopDesc::base_offset_in_bytes(in_elem_type), L4);
+  move_ptr(masm, reg64_to_VMRegPair(L4), body_arg);
+  __ ld(reg.first()->as_Register(), arrayOopDesc::length_offset_in_bytes(), L4);
+  move32_64(masm, reg64_to_VMRegPair(L4), length_arg);
+  __ ba_short(done);
+  __ bind(is_null);
+  // Pass zeros
+  move_ptr(masm, reg64_to_VMRegPair(G0), body_arg);
+  move32_64(masm, reg64_to_VMRegPair(G0), length_arg);
+  __ bind(done);
+}
+
 // ---------------------------------------------------------------------------
 // Generate a native wrapper for a given method.  The method takes arguments
 // in the Java compiled code convention, marshals them to the native
@@ -1762,6 +1951,13 @@
                                                 BasicType *in_sig_bt,
                                                 VMRegPair *in_regs,
                                                 BasicType ret_type) {
+  bool is_critical_native = true;
+  address native_func = method->critical_native_function();
+  if (native_func == NULL) {
+    native_func = method->native_function();
+    is_critical_native = false;
+  }
+  assert(native_func != NULL, "must have function");
 
   // Native nmethod wrappers never take possesion of the oop arguments.
   // So the caller will gc the arguments. The only thing we need an
@@ -1841,22 +2037,70 @@
   // we convert the java signature to a C signature by inserting
   // the hidden arguments as arg[0] and possibly arg[1] (static method)
 
-  int total_c_args = total_in_args + 1;
-  if (method->is_static()) {
-    total_c_args++;
+  int total_c_args = total_in_args;
+  int total_save_slots = 6 * VMRegImpl::slots_per_word;
+  if (!is_critical_native) {
+    total_c_args += 1;
+    if (method->is_static()) {
+      total_c_args++;
+    }
+  } else {
+    for (int i = 0; i < total_in_args; i++) {
+      if (in_sig_bt[i] == T_ARRAY) {
+        // These have to be saved and restored across the safepoint
+        total_c_args++;
+      }
+    }
   }
 
   BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
-  VMRegPair  * out_regs   = NEW_RESOURCE_ARRAY(VMRegPair,   total_c_args);
+  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
+  BasicType* in_elem_bt = NULL;
 
   int argc = 0;
-  out_sig_bt[argc++] = T_ADDRESS;
-  if (method->is_static()) {
-    out_sig_bt[argc++] = T_OBJECT;
-  }
-
-  for (int i = 0; i < total_in_args ; i++ ) {
-    out_sig_bt[argc++] = in_sig_bt[i];
+  if (!is_critical_native) {
+    out_sig_bt[argc++] = T_ADDRESS;
+    if (method->is_static()) {
+      out_sig_bt[argc++] = T_OBJECT;
+    }
+
+    for (int i = 0; i < total_in_args ; i++ ) {
+      out_sig_bt[argc++] = in_sig_bt[i];
+    }
+  } else {
+    Thread* THREAD = Thread::current();
+    in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
+    SignatureStream ss(method->signature());
+    for (int i = 0; i < total_in_args ; i++ ) {
+      if (in_sig_bt[i] == T_ARRAY) {
+        // Arrays are passed as int, elem* pair
+        out_sig_bt[argc++] = T_INT;
+        out_sig_bt[argc++] = T_ADDRESS;
+        Symbol* atype = ss.as_symbol(CHECK_NULL);
+        const char* at = atype->as_C_string();
+        if (strlen(at) == 2) {
+          assert(at[0] == '[', "must be");
+          switch (at[1]) {
+            case 'B': in_elem_bt[i]  = T_BYTE; break;
+            case 'C': in_elem_bt[i]  = T_CHAR; break;
+            case 'D': in_elem_bt[i]  = T_DOUBLE; break;
+            case 'F': in_elem_bt[i]  = T_FLOAT; break;
+            case 'I': in_elem_bt[i]  = T_INT; break;
+            case 'J': in_elem_bt[i]  = T_LONG; break;
+            case 'S': in_elem_bt[i]  = T_SHORT; break;
+            case 'Z': in_elem_bt[i]  = T_BOOLEAN; break;
+            default: ShouldNotReachHere();
+          }
+        }
+      } else {
+        out_sig_bt[argc++] = in_sig_bt[i];
+        in_elem_bt[i] = T_VOID;
+      }
+      if (in_sig_bt[i] != T_VOID) {
+        assert(in_sig_bt[i] == ss.type(), "must match");
+        ss.next();
+      }
+    }
   }
 
   // Now figure out where the args must be stored and how much stack space
@@ -1866,6 +2110,35 @@
   int out_arg_slots;
   out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
 
+  if (is_critical_native) {
+    // Critical natives may have to call out so they need a save area
+    // for register arguments.
+    int double_slots = 0;
+    int single_slots = 0;
+    for ( int i = 0; i < total_in_args; i++) {
+      if (in_regs[i].first()->is_Register()) {
+        const Register reg = in_regs[i].first()->as_Register();
+        switch (in_sig_bt[i]) {
+          case T_ARRAY:
+          case T_BOOLEAN:
+          case T_BYTE:
+          case T_SHORT:
+          case T_CHAR:
+          case T_INT:  assert(reg->is_in(), "don't need to save these"); break;
+          case T_LONG: if (reg->is_global()) double_slots++; break;
+          default:  ShouldNotReachHere();
+        }
+      } else if (in_regs[i].first()->is_FloatRegister()) {
+        switch (in_sig_bt[i]) {
+          case T_FLOAT:  single_slots++; break;
+          case T_DOUBLE: double_slots++; break;
+          default:  ShouldNotReachHere();
+        }
+      }
+    }
+    total_save_slots = double_slots * 2 + single_slots;
+  }
+
   // Compute framesize for the wrapper.  We need to handlize all oops in
   // registers. We must create space for them here that is disjoint from
   // the windowed save area because we have no control over when we might
@@ -1885,12 +2158,11 @@
 
   // Now the space for the inbound oop handle area
 
-  int oop_handle_offset = stack_slots;
-  stack_slots += 6*VMRegImpl::slots_per_word;
+  int oop_handle_offset = round_to(stack_slots, 2);
+  stack_slots += total_save_slots;
 
   // Now any space we need for handlizing a klass if static method
 
-  int oop_temp_slot_offset = 0;
   int klass_slot_offset = 0;
   int klass_offset = -1;
   int lock_slot_offset = 0;
@@ -1954,6 +2226,10 @@
 
   __ verify_thread();
 
+  if (is_critical_native) {
+    check_needs_gc_for_critical_native(masm, stack_slots,  total_in_args,
+                                       oop_handle_offset, oop_maps, in_regs, in_sig_bt);
+  }
 
   //
   // We immediately shuffle the arguments so that any vm call we have to
@@ -1982,7 +2258,6 @@
   // caller.
   //
   OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
-  int c_arg = total_c_args - 1;
   // Record sp-based slot for receiver on stack for non-static methods
   int receiver_offset = -1;
 
@@ -2002,7 +2277,7 @@
 
 #endif /* ASSERT */
 
-  for ( int i = total_in_args - 1; i >= 0 ; i--, c_arg-- ) {
+  for ( int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0 ; i--, c_arg-- ) {
 
 #ifdef ASSERT
     if (in_regs[i].first()->is_Register()) {
@@ -2019,7 +2294,13 @@
 
     switch (in_sig_bt[i]) {
       case T_ARRAY:
+        if (is_critical_native) {
+          unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg], out_regs[c_arg - 1]);
+          c_arg--;
+          break;
+        }
       case T_OBJECT:
+        assert(!is_critical_native, "no oop arguments");
         object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
                     ((i == 0) && (!is_static)),
                     &receiver_offset);
@@ -2029,7 +2310,7 @@
 
       case T_FLOAT:
         float_move(masm, in_regs[i], out_regs[c_arg]);
-          break;
+        break;
 
       case T_DOUBLE:
         assert( i + 1 < total_in_args &&
@@ -2051,7 +2332,7 @@
 
   // Pre-load a static method's oop into O1.  Used both by locking code and
   // the normal JNI call code.
-  if (method->is_static()) {
+  if (method->is_static() && !is_critical_native) {
     __ set_oop_constant(JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror()), O1);
 
     // Now handlize the static class mirror in O1.  It's known not-null.
@@ -2064,13 +2345,13 @@
   const Register L6_handle = L6;
 
   if (method->is_synchronized()) {
+    assert(!is_critical_native, "unhandled");
     __ mov(O1, L6_handle);
   }
 
   // We have all of the arguments setup at this point. We MUST NOT touch any Oregs
   // except O6/O7. So if we must call out we must push a new frame. We immediately
   // push a new frame and flush the windows.
-
 #ifdef _LP64
   intptr_t thepc = (intptr_t) __ pc();
   {
@@ -2202,32 +2483,28 @@
   }
 
   // get JNIEnv* which is first argument to native
-
-  __ add(G2_thread, in_bytes(JavaThread::jni_environment_offset()), O0);
+  if (!is_critical_native) {
+    __ add(G2_thread, in_bytes(JavaThread::jni_environment_offset()), O0);
+  }
 
   // Use that pc we placed in O7 a while back as the current frame anchor
-
   __ set_last_Java_frame(SP, O7);
 
+  // We flushed the windows ages ago now mark them as flushed before transitioning.
+  __ set(JavaFrameAnchor::flushed, G3_scratch);
+  __ st(G3_scratch, G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::flags_offset());
+
   // Transition from _thread_in_Java to _thread_in_native.
   __ set(_thread_in_native, G3_scratch);
-  __ st(G3_scratch, G2_thread, JavaThread::thread_state_offset());
-
-  // We flushed the windows ages ago now mark them as flushed
-
-  // mark windows as flushed
-  __ set(JavaFrameAnchor::flushed, G3_scratch);
-
-  Address flags(G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::flags_offset());
 
 #ifdef _LP64
-  AddressLiteral dest(method->native_function());
+  AddressLiteral dest(native_func);
   __ relocate(relocInfo::runtime_call_type);
   __ jumpl_to(dest, O7, O7);
 #else
-  __ call(method->native_function(), relocInfo::runtime_call_type);
+  __ call(native_func, relocInfo::runtime_call_type);
 #endif
-  __ delayed()->st(G3_scratch, flags);
+  __ delayed()->st(G3_scratch, G2_thread, JavaThread::thread_state_offset());
 
   __ restore_thread(L7_thread_cache); // restore G2_thread
 
@@ -2259,6 +2536,7 @@
     ShouldNotReachHere();
   }
 
+  Label after_transition;
   // must we block?
 
   // Block, if necessary, before resuming in _thread_in_Java state.
@@ -2303,22 +2581,34 @@
     // a distinct one for this pc
     //
     save_native_result(masm, ret_type, stack_slots);
-    __ call_VM_leaf(L7_thread_cache,
-                    CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans),
-                    G2_thread);
+    if (!is_critical_native) {
+      __ call_VM_leaf(L7_thread_cache,
+                      CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans),
+                      G2_thread);
+    } else {
+      __ call_VM_leaf(L7_thread_cache,
+                      CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition),
+                      G2_thread);
+    }
 
     // Restore any method result value
     restore_native_result(masm, ret_type, stack_slots);
+
+    if (is_critical_native) {
+      // The call above performed the transition to thread_in_Java so
+      // skip the transition logic below.
+      __ ba(after_transition);
+      __ delayed()->nop();
+    }
+
     __ bind(no_block);
   }
 
   // thread state is thread_in_native_trans. Any safepoint blocking has already
   // happened so we can now change state to _thread_in_Java.
-
-
   __ set(_thread_in_Java, G3_scratch);
   __ st(G3_scratch, G2_thread, JavaThread::thread_state_offset());
-
+  __ bind(after_transition);
 
   Label no_reguard;
   __ ld(G2_thread, JavaThread::stack_guard_state_offset(), G3_scratch);
@@ -2416,12 +2706,14 @@
       __ verify_oop(I0);
   }
 
-  // reset handle block
-  __ ld_ptr(G2_thread, in_bytes(JavaThread::active_handles_offset()), L5);
-  __ st_ptr(G0, L5, JNIHandleBlock::top_offset_in_bytes());
-
-  __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), G3_scratch);
-  check_forward_pending_exception(masm, G3_scratch);
+  if (!is_critical_native) {
+    // reset handle block
+    __ ld_ptr(G2_thread, in_bytes(JavaThread::active_handles_offset()), L5);
+    __ st_ptr(G0, L5, JNIHandleBlock::top_offset_in_bytes());
+
+    __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), G3_scratch);
+    check_forward_pending_exception(masm, G3_scratch);
+  }
 
 
   // Return
@@ -2450,6 +2742,10 @@
                                             (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
                                             in_ByteSize(lock_offset),
                                             oop_maps);
+
+  if (is_critical_native) {
+    nm->set_lazy_critical_native(true);
+  }
   return nm;
 
 }
@@ -2473,17 +2769,6 @@
 static int  fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 };
 static bool offsets_initialized = false;
 
-static VMRegPair reg64_to_VMRegPair(Register r) {
-  VMRegPair ret;
-  if (wordSize == 8) {
-    ret.set2(r->as_VMReg());
-  } else {
-    ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg());
-  }
-  return ret;
-}
-
-
 nmethod *SharedRuntime::generate_dtrace_nmethod(
     MacroAssembler *masm, methodHandle method) {
 
@@ -3146,6 +3431,9 @@
   ResourceMark rm;
   // setup code generation tools
   int pad = VerifyThread ? 512 : 0;// Extra slop space for more verify code
+  if (UseStackBanging) {
+    pad += StackShadowPages*16 + 32;
+  }
 #ifdef _LP64
   CodeBuffer buffer("deopt_blob", 2100+pad, 512);
 #else
@@ -3365,6 +3653,9 @@
   ResourceMark rm;
   // setup code generation tools
   int pad = VerifyThread ? 512 : 0;
+  if (UseStackBanging) {
+    pad += StackShadowPages*16 + 32;
+  }
 #ifdef _LP64
   CodeBuffer buffer("uncommon_trap_blob", 2700+pad, 512);
 #else
--- a/src/cpu/sparc/vm/sparc.ad	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/sparc/vm/sparc.ad	Mon Feb 27 15:06:36 2012 -0800
@@ -6773,6 +6773,16 @@
   ins_pipe(empty);
 %}
 
+instruct membar_storestore() %{
+  match(MemBarStoreStore);
+  ins_cost(0);
+
+  size(0);
+  format %{ "!MEMBAR-storestore (empty encoding)" %}
+  ins_encode( );
+  ins_pipe(empty);
+%}
+
 //----------Register Move Instructions-----------------------------------------
 instruct roundDouble_nop(regD dst) %{
   match(Set dst (RoundDouble dst));
@@ -9273,6 +9283,7 @@
 // (compare 'operand indIndex' and 'instruct addP_reg_reg' above)
 instruct jumpXtnd(iRegX switch_val, o7RegI table) %{
   match(Jump switch_val);
+  effect(TEMP table);
 
   ins_cost(350);
 
@@ -10263,24 +10274,24 @@
 // ============================================================================
 // inlined locking and unlocking
 
-instruct cmpFastLock(flagsRegP pcc, iRegP object, iRegP box, iRegP scratch2, o7RegP scratch ) %{
+instruct cmpFastLock(flagsRegP pcc, iRegP object, o1RegP box, iRegP scratch2, o7RegP scratch ) %{
   match(Set pcc (FastLock object box));
 
-  effect(KILL scratch, TEMP scratch2);
+  effect(TEMP scratch2, USE_KILL box, KILL scratch);
   ins_cost(100);
 
-  format %{ "FASTLOCK  $object, $box; KILL $scratch, $scratch2, $box" %}
+  format %{ "FASTLOCK  $object,$box\t! kills $box,$scratch,$scratch2" %}
   ins_encode( Fast_Lock(object, box, scratch, scratch2) );
   ins_pipe(long_memory_op);
 %}
 
 
-instruct cmpFastUnlock(flagsRegP pcc, iRegP object, iRegP box, iRegP scratch2, o7RegP scratch ) %{
+instruct cmpFastUnlock(flagsRegP pcc, iRegP object, o1RegP box, iRegP scratch2, o7RegP scratch ) %{
   match(Set pcc (FastUnlock object box));
-  effect(KILL scratch, TEMP scratch2);
+  effect(TEMP scratch2, USE_KILL box, KILL scratch);
   ins_cost(100);
 
-  format %{ "FASTUNLOCK  $object, $box; KILL $scratch, $scratch2, $box" %}
+  format %{ "FASTUNLOCK  $object,$box\t! kills $box,$scratch,$scratch2" %}
   ins_encode( Fast_Unlock(object, box, scratch, scratch2) );
   ins_pipe(long_memory_op);
 %}
--- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -436,7 +436,7 @@
 #undef __
 #define __ masm->
 
-  address generate_throw_exception(const char* name, address runtime_entry, bool restore_saved_exception_pc,
+  address generate_throw_exception(const char* name, address runtime_entry,
                                    Register arg1 = noreg, Register arg2 = noreg) {
 #ifdef ASSERT
     int insts_size = VerifyThread ? 1 * K : 600;
@@ -3046,8 +3046,7 @@
     //   array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
     //
 
-    int lh_offset = klassOopDesc::header_size() * HeapWordSize +
-                    Klass::layout_helper_offset_in_bytes();
+    int lh_offset = in_bytes(Klass::layout_helper_offset());
 
     // Load 32-bits signed value. Use br() instruction with it to check icc.
     __ lduw(G3_src_klass, lh_offset, G5_lh);
@@ -3194,15 +3193,13 @@
                                  G4_dst_klass, G3_src_klass);
 
       // Generate the type check.
-      int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
-                        Klass::super_check_offset_offset_in_bytes());
+      int sco_offset = in_bytes(Klass::super_check_offset_offset());
       __ lduw(G4_dst_klass, sco_offset, sco_temp);
       generate_type_check(G3_src_klass, sco_temp, G4_dst_klass,
                           O5_temp, L_plain_copy);
 
       // Fetch destination element klass from the objArrayKlass header.
-      int ek_offset = (klassOopDesc::header_size() * HeapWordSize +
-                       objArrayKlass::element_klass_offset_in_bytes());
+      int ek_offset = in_bytes(objArrayKlass::element_klass_offset());
 
       // the checkcast_copy loop needs two extra arguments:
       __ ld_ptr(G4_dst_klass, ek_offset, O4);   // dest elem klass
@@ -3413,7 +3410,10 @@
     StubRoutines::_throw_WrongMethodTypeException_entry =
       generate_throw_exception("WrongMethodTypeException throw_exception",
                                CAST_FROM_FN_PTR(address, SharedRuntime::throw_WrongMethodTypeException),
-                               false, G5_method_type, G3_method_handle);
+                               G5_method_type, G3_method_handle);
+
+    // Build this early so it's available for the interpreter.
+    StubRoutines::_throw_StackOverflowError_entry          = generate_throw_exception("StackOverflowError throw_exception",           CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError));
   }
 
 
@@ -3427,7 +3427,6 @@
     StubRoutines::_throw_AbstractMethodError_entry         = generate_throw_exception("AbstractMethodError throw_exception",          CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError));
     StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError));
     StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call));
-    StubRoutines::_throw_StackOverflowError_entry          = generate_throw_exception("StackOverflowError throw_exception",           CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError));
 
     StubRoutines::_handler_for_unsafe_access_entry =
       generate_handler_for_unsafe_access();
--- a/src/cpu/sparc/vm/templateInterpreter_sparc.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/sparc/vm/templateInterpreter_sparc.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -366,7 +366,7 @@
 
   // get synchronization object to O0
   { Label done;
-    const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes();
+    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
     __ btst(JVM_ACC_STATIC, O0);
     __ br( Assembler::zero, true, Assembler::pt, done);
     __ delayed()->ld_ptr(Llocals, Interpreter::local_offset_in_bytes(0), O0); // get receiver for not-static case
@@ -396,7 +396,6 @@
                                                          Register Rscratch,
                                                          Register Rscratch2) {
   const int page_size = os::vm_page_size();
-  Address saved_exception_pc(G2_thread, JavaThread::saved_exception_pc_offset());
   Label after_frame_check;
 
   assert_different_registers(Rframe_size, Rscratch, Rscratch2);
@@ -436,11 +435,19 @@
   // the bottom of the stack
   __ cmp_and_brx_short(SP, Rscratch, Assembler::greater, Assembler::pt, after_frame_check);
 
-  // Save the return address as the exception pc
-  __ st_ptr(O7, saved_exception_pc);
+  // the stack will overflow, throw an exception
+
+  // Note that SP is restored to sender's sp (in the delay slot). This
+  // is necessary if the sender's frame is an extended compiled frame
+  // (see gen_c2i_adapter()) and safer anyway in case of JSR292
+  // adaptations.
 
-  // the stack will overflow, throw an exception
-  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError));
+  // Note also that the restored frame is not necessarily interpreted.
+  // Use the shared runtime version of the StackOverflowError.
+  assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated");
+  AddressLiteral stub(StubRoutines::throw_StackOverflowError_entry());
+  __ jump_to(stub, Rscratch);
+  __ delayed()->mov(O5_savedSP, SP);
 
   // if you get to here, then there is enough stack space
   __ bind( after_frame_check );
@@ -984,7 +991,7 @@
     // get native function entry point(O0 is a good temp until the very end)
     __ delayed()->ld_ptr(Lmethod, in_bytes(methodOopDesc::native_function_offset()), O0);
     // for static methods insert the mirror argument
-    const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes();
+    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
 
     __ ld_ptr(Lmethod, methodOopDesc:: constants_offset(), O1);
     __ ld_ptr(O1, constantPoolOopDesc::pool_holder_offset_in_bytes(), O1);
--- a/src/cpu/sparc/vm/templateTable_sparc.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/sparc/vm/templateTable_sparc.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -888,7 +888,7 @@
 
   // do fast instanceof cache test
 
-  __ ld_ptr(O4,     sizeof(oopDesc) + objArrayKlass::element_klass_offset_in_bytes(),  O4);
+  __ ld_ptr(O4,     in_bytes(objArrayKlass::element_klass_offset()),  O4);
 
   assert(Otos_i == O0, "just checking");
 
@@ -2031,7 +2031,7 @@
     __ access_local_ptr(G3_scratch, Otos_i);
     __ load_klass(Otos_i, O2);
     __ set(JVM_ACC_HAS_FINALIZER, G3);
-    __ ld(O2, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc), O2);
+    __ ld(O2, in_bytes(Klass::access_flags_offset()), O2);
     __ andcc(G3, O2, G0);
     Label skip_register_finalizer;
     __ br(Assembler::zero, false, Assembler::pn, skip_register_finalizer);
@@ -3350,13 +3350,13 @@
   __ ld_ptr(Rscratch, Roffset, RinstanceKlass);
 
   // make sure klass is fully initialized:
-  __ ld(RinstanceKlass, instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc), G3_scratch);
+  __ ldub(RinstanceKlass, in_bytes(instanceKlass::init_state_offset()), G3_scratch);
   __ cmp(G3_scratch, instanceKlass::fully_initialized);
   __ br(Assembler::notEqual, false, Assembler::pn, slow_case);
-  __ delayed()->ld(RinstanceKlass, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc), Roffset);
+  __ delayed()->ld(RinstanceKlass, in_bytes(Klass::layout_helper_offset()), Roffset);
 
   // get instance_size in instanceKlass (already aligned)
-  //__ ld(RinstanceKlass, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc), Roffset);
+  //__ ld(RinstanceKlass, in_bytes(Klass::layout_helper_offset()), Roffset);
 
   // make sure klass does not have has_finalizer, or is abstract, or interface or java/lang/Class
   __ btst(Klass::_lh_instance_slow_path_bit, Roffset);
@@ -3483,7 +3483,7 @@
   __ bind(initialize_header);
 
   if (UseBiasedLocking) {
-    __ ld_ptr(RinstanceKlass, Klass::prototype_header_offset_in_bytes() + sizeof(oopDesc), G4_scratch);
+    __ ld_ptr(RinstanceKlass, in_bytes(Klass::prototype_header_offset()), G4_scratch);
   } else {
     __ set((intptr_t)markOopDesc::prototype(), G4_scratch);
   }
--- a/src/cpu/x86/vm/assembler_x86.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -236,6 +236,16 @@
   }
 }
 
+// Force generation of a 4 byte immediate value even if it fits into 8bit
+void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32) {
+  assert(isByte(op1) && isByte(op2), "wrong opcode");
+  assert((op1 & 0x01) == 1, "should be 32bit operation");
+  assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
+  emit_byte(op1);
+  emit_byte(op2 | encode(dst));
+  emit_long(imm32);
+}
+
 // immediate-to-memory forms
 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) {
   assert((op1 & 0x01) == 1, "should be 32bit operation");
@@ -533,6 +543,19 @@
 
   case 0x0F: // movx..., etc.
     switch (0xFF & *ip++) {
+    case 0x3A: // pcmpestri
+      tail_size = 1;
+    case 0x38: // ptest, pmovzxbw
+      ip++; // skip opcode
+      debug_only(has_disp32 = true); // has both kinds of operands!
+      break;
+
+    case 0x70: // pshufd r, r/a, #8
+      debug_only(has_disp32 = true); // has both kinds of operands!
+    case 0x73: // psrldq r, #8
+      tail_size = 1;
+      break;
+
     case 0x12: // movlps
     case 0x28: // movaps
     case 0x2E: // ucomiss
@@ -543,9 +566,7 @@
     case 0x57: // xorps
     case 0x6E: // movd
     case 0x7E: // movd
-    case 0xAE: // ldmxcsr   a
-      // 64bit side says it these have both operands but that doesn't
-      // appear to be true
+    case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
       debug_only(has_disp32 = true);
       break;
 
@@ -565,6 +586,12 @@
       // fall out of the switch to decode the address
       break;
 
+    case 0xC4: // pinsrw r, a, #8
+      debug_only(has_disp32 = true);
+    case 0xC5: // pextrw r, r, #8
+      tail_size = 1;  // the imm8
+      break;
+
     case 0xAC: // shrd r, a, #8
       debug_only(has_disp32 = true);
       tail_size = 1;  // the imm8
@@ -625,11 +652,44 @@
     tail_size = 1; // the imm8
     break;
 
-  case 0xE8: // call rdisp32
-  case 0xE9: // jmp  rdisp32
-    if (which == end_pc_operand)  return ip + 4;
-    assert(which == call32_operand, "call has no disp32 or imm");
-    return ip;
+  case 0xC4: // VEX_3bytes
+  case 0xC5: // VEX_2bytes
+    assert((UseAVX > 0), "shouldn't have VEX prefix");
+    assert(ip == inst+1, "no prefixes allowed");
+    // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions
+    // but they have prefix 0x0F and processed when 0x0F processed above.
+    //
+    // In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES
+    // instructions (these instructions are not supported in 64-bit mode).
+    // To distinguish them bits [7:6] are set in the VEX second byte since
+    // ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set
+    // those VEX bits REX and vvvv bits are inverted.
+    //
+    // Fortunately C2 doesn't generate these instructions so we don't need
+    // to check for them in product version.
+
+    // Check second byte
+    NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions"));
+
+    // First byte
+    if ((0xFF & *inst) == VEX_3bytes) {
+      ip++; // third byte
+      is_64bit = ((VEX_W & *ip) == VEX_W);
+    }
+    ip++; // opcode
+    // To find the end of instruction (which == end_pc_operand).
+    switch (0xFF & *ip) {
+    case 0x61: // pcmpestri r, r/a, #8
+    case 0x70: // pshufd r, r/a, #8
+    case 0x73: // psrldq r, #8
+      tail_size = 1;  // the imm8
+      break;
+    default:
+      break;
+    }
+    ip++; // skip opcode
+    debug_only(has_disp32 = true); // has both kinds of operands!
+    break;
 
   case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
   case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
@@ -643,6 +703,12 @@
     debug_only(has_disp32 = true);
     break;
 
+  case 0xE8: // call rdisp32
+  case 0xE9: // jmp  rdisp32
+    if (which == end_pc_operand)  return ip + 4;
+    assert(which == call32_operand, "call has no disp32 or imm");
+    return ip;
+
   case 0xF0:                    // Lock
     assert(os::is_MP(), "only on MP");
     goto again_after_prefix;
@@ -883,6 +949,7 @@
 }
 
 void Assembler::addr_nop_4() {
+  assert(UseAddressNop, "no CPU support");
   // 4 bytes: NOP DWORD PTR [EAX+0]
   emit_byte(0x0F);
   emit_byte(0x1F);
@@ -891,6 +958,7 @@
 }
 
 void Assembler::addr_nop_5() {
+  assert(UseAddressNop, "no CPU support");
   // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
   emit_byte(0x0F);
   emit_byte(0x1F);
@@ -900,6 +968,7 @@
 }
 
 void Assembler::addr_nop_7() {
+  assert(UseAddressNop, "no CPU support");
   // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
   emit_byte(0x0F);
   emit_byte(0x1F);
@@ -908,6 +977,7 @@
 }
 
 void Assembler::addr_nop_8() {
+  assert(UseAddressNop, "no CPU support");
   // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
   emit_byte(0x0F);
   emit_byte(0x1F);
@@ -918,9 +988,7 @@
 
 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0xF2);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
   emit_byte(0x58);
   emit_byte(0xC0 | encode);
 }
@@ -928,18 +996,14 @@
 void Assembler::addsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  emit_byte(0xF2);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  simd_prefix(dst, dst, src, VEX_SIMD_F2);
   emit_byte(0x58);
   emit_operand(dst, src);
 }
 
 void Assembler::addss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_byte(0xF3);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
   emit_byte(0x58);
   emit_byte(0xC0 | encode);
 }
@@ -947,13 +1011,19 @@
 void Assembler::addss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   InstructionMark im(this);
-  emit_byte(0xF3);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  simd_prefix(dst, dst, src, VEX_SIMD_F3);
   emit_byte(0x58);
   emit_operand(dst, src);
 }
 
+void Assembler::andl(Address dst, int32_t imm32) {
+  InstructionMark im(this);
+  prefix(dst);
+  emit_byte(0x81);
+  emit_operand(rsp, dst, 4);
+  emit_long(imm32);
+}
+
 void Assembler::andl(Register dst, int32_t imm32) {
   prefix(dst);
   emit_arith(0x81, 0xE0, dst, imm32);
@@ -974,13 +1044,33 @@
 void Assembler::andpd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  emit_byte(0x66);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  simd_prefix(dst, dst, src, VEX_SIMD_66);
   emit_byte(0x54);
   emit_operand(dst, src);
 }
 
+void Assembler::andpd(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
+  emit_byte(0x54);
+  emit_byte(0xC0 | encode);
+}
+
+void Assembler::andps(XMMRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse(), ""));
+  InstructionMark im(this);
+  simd_prefix(dst, dst, src, VEX_SIMD_NONE);
+  emit_byte(0x54);
+  emit_operand(dst, src);
+}
+
+void Assembler::andps(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse(), ""));
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE);
+  emit_byte(0x54);
+  emit_byte(0xC0 | encode);
+}
+
 void Assembler::bsfl(Register dst, Register src) {
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
   emit_byte(0x0F);
@@ -1025,19 +1115,7 @@
 }
 
 void Assembler::call(Register dst) {
-  // This was originally using a 32bit register encoding
-  // and surely we want 64bit!
-  // this is a 32bit encoding but in 64bit mode the default
-  // operand size is 64bit so there is no need for the
-  // wide prefix. So prefix only happens if we use the
-  // new registers. Much like push/pop.
-  int x = offset();
-  // this may be true but dbx disassembles it as if it
-  // were 32bits...
-  // int encode = prefix_and_encode(dst->encoding());
-  // if (offset() != x) assert(dst->encoding() >= 8, "what?");
-  int encode = prefixq_and_encode(dst->encoding());
-
+  int encode = prefix_and_encode(dst->encoding());
   emit_byte(0xFF);
   emit_byte(0xD0 | encode);
 }
@@ -1157,87 +1235,119 @@
   // NOTE: dbx seems to decode this as comiss even though the
   // 0x66 is there. Strangly ucomisd comes out correct
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0x66);
-  comiss(dst, src);
+  InstructionMark im(this);
+  simd_prefix(dst, src, VEX_SIMD_66);
+  emit_byte(0x2F);
+  emit_operand(dst, src);
+}
+
+void Assembler::comisd(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
+  emit_byte(0x2F);
+  emit_byte(0xC0 | encode);
 }
 
 void Assembler::comiss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-
-  InstructionMark im(this);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  InstructionMark im(this);
+  simd_prefix(dst, src, VEX_SIMD_NONE);
   emit_byte(0x2F);
   emit_operand(dst, src);
 }
 
+void Assembler::comiss(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse(), ""));
+  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);
+  emit_byte(0x2F);
+  emit_byte(0xC0 | encode);
+}
+
 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0xF3);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3);
   emit_byte(0xE6);
   emit_byte(0xC0 | encode);
 }
 
 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);
   emit_byte(0x5B);
   emit_byte(0xC0 | encode);
 }
 
 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0xF2);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
   emit_byte(0x5A);
   emit_byte(0xC0 | encode);
 }
 
+void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionMark im(this);
+  simd_prefix(dst, dst, src, VEX_SIMD_F2);
+  emit_byte(0x5A);
+  emit_operand(dst, src);
+}
+
 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0xF2);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
   emit_byte(0x2A);
   emit_byte(0xC0 | encode);
 }
 
+void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionMark im(this);
+  simd_prefix(dst, dst, src, VEX_SIMD_F2);
+  emit_byte(0x2A);
+  emit_operand(dst, src);
+}
+
 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_byte(0xF3);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
   emit_byte(0x2A);
   emit_byte(0xC0 | encode);
 }
 
+void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse(), ""));
+  InstructionMark im(this);
+  simd_prefix(dst, dst, src, VEX_SIMD_F3);
+  emit_byte(0x2A);
+  emit_operand(dst, src);
+}
+
 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0xF3);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
   emit_byte(0x5A);
   emit_byte(0xC0 | encode);
 }
 
+void Assembler::cvtss2sd(XMMRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionMark im(this);
+  simd_prefix(dst, dst, src, VEX_SIMD_F3);
+  emit_byte(0x5A);
+  emit_operand(dst, src);
+}
+
+
 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0xF2);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2);
   emit_byte(0x2C);
   emit_byte(0xC0 | encode);
 }
 
 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_byte(0xF3);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3);
   emit_byte(0x2C);
   emit_byte(0xC0 | encode);
 }
@@ -1253,18 +1363,14 @@
 void Assembler::divsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  emit_byte(0xF2);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  simd_prefix(dst, dst, src, VEX_SIMD_F2);
   emit_byte(0x5E);
   emit_operand(dst, src);
 }
 
 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0xF2);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
   emit_byte(0x5E);
   emit_byte(0xC0 | encode);
 }
@@ -1272,18 +1378,14 @@
 void Assembler::divss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   InstructionMark im(this);
-  emit_byte(0xF3);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  simd_prefix(dst, dst, src, VEX_SIMD_F3);
   emit_byte(0x5E);
   emit_operand(dst, src);
 }
 
 void Assembler::divss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_byte(0xF3);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
   emit_byte(0x5E);
   emit_byte(0xC0 | encode);
 }
@@ -1377,8 +1479,14 @@
   if (L.is_bound()) {
     const int short_size = 2;
     address entry = target(L);
-    assert(is8bit((intptr_t)entry - ((intptr_t)_code_pos + short_size)),
-           "Dispacement too large for a short jmp");
+#ifdef ASSERT
+    intptr_t dist = (intptr_t)entry - ((intptr_t)_code_pos + short_size);
+    intptr_t delta = short_branch_delta();
+    if (delta != 0) {
+      dist += (dist < 0 ? (-delta) :delta);
+    }
+    assert(is8bit(dist), "Dispacement too large for a short jmp");
+#endif
     intptr_t offs = (intptr_t)entry - (intptr_t)_code_pos;
     // 0111 tttn #8-bit disp
     emit_byte(0x70 | cc);
@@ -1444,9 +1552,15 @@
   if (L.is_bound()) {
     const int short_size = 2;
     address entry = target(L);
-    assert(is8bit((entry - _code_pos) + short_size),
-           "Dispacement too large for a short jmp");
     assert(entry != NULL, "jmp most probably wrong");
+#ifdef ASSERT
+    intptr_t dist = (intptr_t)entry - ((intptr_t)_code_pos + short_size);
+    intptr_t delta = short_branch_delta();
+    if (delta != 0) {
+      dist += (dist < 0 ? (-delta) :delta);
+    }
+    assert(is8bit(dist), "Dispacement too large for a short jmp");
+#endif
     intptr_t offs = entry - _code_pos;
     emit_byte(0xEB);
     emit_byte((offs - short_size) & 0xFF);
@@ -1509,49 +1623,16 @@
 
 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int dstenc = dst->encoding();
-  int srcenc = src->encoding();
-  emit_byte(0x66);
-  if (dstenc < 8) {
-    if (srcenc >= 8) {
-      prefix(REX_B);
-      srcenc -= 8;
-    }
-  } else {
-    if (srcenc < 8) {
-      prefix(REX_R);
-    } else {
-      prefix(REX_RB);
-      srcenc -= 8;
-    }
-    dstenc -= 8;
-  }
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
   emit_byte(0x28);
-  emit_byte(0xC0 | dstenc << 3 | srcenc);
+  emit_byte(0xC0 | encode);
 }
 
 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int dstenc = dst->encoding();
-  int srcenc = src->encoding();
-  if (dstenc < 8) {
-    if (srcenc >= 8) {
-      prefix(REX_B);
-      srcenc -= 8;
-    }
-  } else {
-    if (srcenc < 8) {
-      prefix(REX_R);
-    } else {
-      prefix(REX_RB);
-      srcenc -= 8;
-    }
-    dstenc -= 8;
-  }
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);
   emit_byte(0x28);
-  emit_byte(0xC0 | dstenc << 3 | srcenc);
+  emit_byte(0xC0 | encode);
 }
 
 void Assembler::movb(Register dst, Address src) {
@@ -1582,19 +1663,15 @@
 
 void Assembler::movdl(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0x66);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
   emit_byte(0x6E);
   emit_byte(0xC0 | encode);
 }
 
 void Assembler::movdl(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0x66);
   // swap src/dst to get correct prefix
-  int encode = prefix_and_encode(src->encoding(), dst->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66);
   emit_byte(0x7E);
   emit_byte(0xC0 | encode);
 }
@@ -1602,58 +1679,29 @@
 void Assembler::movdl(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  emit_byte(0x66);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  simd_prefix(dst, src, VEX_SIMD_66);
   emit_byte(0x6E);
   emit_operand(dst, src);
 }
 
-
-void Assembler::movdqa(XMMRegister dst, Address src) {
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  emit_byte(0x66);
-  prefix(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0x6F);
-  emit_operand(dst, src);
-}
-
 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0x66);
-  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
   emit_byte(0x6F);
   emit_byte(0xC0 | encode);
 }
 
-void Assembler::movdqa(Address dst, XMMRegister src) {
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  emit_byte(0x66);
-  prefix(dst, src);
-  emit_byte(0x0F);
-  emit_byte(0x7F);
-  emit_operand(src, dst);
-}
-
 void Assembler::movdqu(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  emit_byte(0xF3);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  simd_prefix(dst, src, VEX_SIMD_F3);
   emit_byte(0x6F);
   emit_operand(dst, src);
 }
 
 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0xF3);
-  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3);
   emit_byte(0x6F);
   emit_byte(0xC0 | encode);
 }
@@ -1661,9 +1709,7 @@
 void Assembler::movdqu(Address dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  emit_byte(0xF3);
-  prefix(dst, src);
-  emit_byte(0x0F);
+  simd_prefix(dst, src, VEX_SIMD_F3);
   emit_byte(0x7F);
   emit_operand(src, dst);
 }
@@ -1710,9 +1756,7 @@
 void Assembler::movlpd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  emit_byte(0x66);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  simd_prefix(dst, dst, src, VEX_SIMD_66);
   emit_byte(0x12);
   emit_operand(dst, src);
 }
@@ -1740,9 +1784,7 @@
 void Assembler::movq(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  emit_byte(0xF3);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  simd_prefix(dst, src, VEX_SIMD_F3);
   emit_byte(0x7E);
   emit_operand(dst, src);
 }
@@ -1750,9 +1792,7 @@
 void Assembler::movq(Address dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  emit_byte(0x66);
-  prefix(dst, src);
-  emit_byte(0x0F);
+  simd_prefix(dst, src, VEX_SIMD_66);
   emit_byte(0xD6);
   emit_operand(src, dst);
 }
@@ -1775,9 +1815,7 @@
 
 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0xF2);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
   emit_byte(0x10);
   emit_byte(0xC0 | encode);
 }
@@ -1785,9 +1823,7 @@
 void Assembler::movsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  emit_byte(0xF2);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  simd_prefix(dst, src, VEX_SIMD_F2);
   emit_byte(0x10);
   emit_operand(dst, src);
 }
@@ -1795,18 +1831,14 @@
 void Assembler::movsd(Address dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  emit_byte(0xF2);
-  prefix(dst, src);
-  emit_byte(0x0F);
+  simd_prefix(dst, src, VEX_SIMD_F2);
   emit_byte(0x11);
   emit_operand(src, dst);
 }
 
 void Assembler::movss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_byte(0xF3);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
   emit_byte(0x10);
   emit_byte(0xC0 | encode);
 }
@@ -1814,9 +1846,7 @@
 void Assembler::movss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   InstructionMark im(this);
-  emit_byte(0xF3);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  simd_prefix(dst, src, VEX_SIMD_F3);
   emit_byte(0x10);
   emit_operand(dst, src);
 }
@@ -1824,9 +1854,7 @@
 void Assembler::movss(Address dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   InstructionMark im(this);
-  emit_byte(0xF3);
-  prefix(dst, src);
-  emit_byte(0x0F);
+  simd_prefix(dst, src, VEX_SIMD_F3);
   emit_byte(0x11);
   emit_operand(src, dst);
 }
@@ -1919,18 +1947,14 @@
 void Assembler::mulsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  emit_byte(0xF2);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  simd_prefix(dst, dst, src, VEX_SIMD_F2);
   emit_byte(0x59);
   emit_operand(dst, src);
 }
 
 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0xF2);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
   emit_byte(0x59);
   emit_byte(0xC0 | encode);
 }
@@ -1938,18 +1962,14 @@
 void Assembler::mulss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   InstructionMark im(this);
-  emit_byte(0xF3);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  simd_prefix(dst, dst, src, VEX_SIMD_F3);
   emit_byte(0x59);
   emit_operand(dst, src);
 }
 
 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_byte(0xF3);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
   emit_byte(0x59);
   emit_byte(0xC0 | encode);
 }
@@ -2237,14 +2257,26 @@
   emit_arith(0x0B, 0xC0, dst, src);
 }
 
+void Assembler::packuswb(XMMRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
+  InstructionMark im(this);
+  simd_prefix(dst, dst, src, VEX_SIMD_66);
+  emit_byte(0x67);
+  emit_operand(dst, src);
+}
+
+void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
+  emit_byte(0x67);
+  emit_byte(0xC0 | encode);
+}
+
 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
   assert(VM_Version::supports_sse4_2(), "");
-
-  InstructionMark im(this);
-  emit_byte(0x66);
-  prefix(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0x3A);
+  InstructionMark im(this);
+  simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
   emit_byte(0x61);
   emit_operand(dst, src);
   emit_byte(imm8);
@@ -2252,16 +2284,27 @@
 
 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
   assert(VM_Version::supports_sse4_2(), "");
-
-  emit_byte(0x66);
-  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0x3A);
+  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
   emit_byte(0x61);
   emit_byte(0xC0 | encode);
   emit_byte(imm8);
 }
 
+void Assembler::pmovzxbw(XMMRegister dst, Address src) {
+  assert(VM_Version::supports_sse4_1(), "");
+  InstructionMark im(this);
+  simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  emit_byte(0x30);
+  emit_operand(dst, src);
+}
+
+void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
+  assert(VM_Version::supports_sse4_1(), "");
+  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  emit_byte(0x30);
+  emit_byte(0xC0 | encode);
+}
+
 // generic
 void Assembler::pop(Register dst) {
   int encode = prefix_and_encode(dst->encoding());
@@ -2360,22 +2403,24 @@
 
 void Assembler::por(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-
-  emit_byte(0x66);
-  int  encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
   emit_byte(0xEB);
   emit_byte(0xC0 | encode);
 }
 
+void Assembler::por(XMMRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
+  InstructionMark im(this);
+  simd_prefix(dst, dst, src, VEX_SIMD_66);
+  emit_byte(0xEB);
+  emit_operand(dst, src);
+}
+
 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
   assert(isByte(mode), "invalid value");
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-
-  emit_byte(0x66);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
   emit_byte(0x70);
   emit_byte(0xC0 | encode);
   emit_byte(mode & 0xFF);
@@ -2385,11 +2430,9 @@
 void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
   assert(isByte(mode), "invalid value");
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-
-  InstructionMark im(this);
-  emit_byte(0x66);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
+  InstructionMark im(this);
+  simd_prefix(dst, src, VEX_SIMD_66);
   emit_byte(0x70);
   emit_operand(dst, src);
   emit_byte(mode & 0xFF);
@@ -2398,10 +2441,7 @@
 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
   assert(isByte(mode), "invalid value");
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-
-  emit_byte(0xF2);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2);
   emit_byte(0x70);
   emit_byte(0xC0 | encode);
   emit_byte(mode & 0xFF);
@@ -2410,11 +2450,9 @@
 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
   assert(isByte(mode), "invalid value");
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-
-  InstructionMark im(this);
-  emit_byte(0xF2);
-  prefix(src, dst); // QQ new
-  emit_byte(0x0F);
+  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
+  InstructionMark im(this);
+  simd_prefix(dst, src, VEX_SIMD_F2);
   emit_byte(0x70);
   emit_operand(dst, src);
   emit_byte(mode & 0xFF);
@@ -2425,11 +2463,8 @@
   // HMM Table D-1 says sse2 or mmx.
   // Do not confuse it with psrldq SSE2 instruction which
   // shifts 128 bit value in xmm register by number of bytes.
-  NOT_LP64(assert(VM_Version::supports_sse(), ""));
-
-  int encode = prefixq_and_encode(xmm2->encoding(), dst->encoding());
-  emit_byte(0x66);
-  emit_byte(0x0F);
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
   emit_byte(0x73);
   emit_byte(0xC0 | encode);
   emit_byte(shift);
@@ -2438,10 +2473,7 @@
 void Assembler::psrldq(XMMRegister dst, int shift) {
   // Shift 128 bit value in xmm register by number of bytes.
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-
-  int encode = prefixq_and_encode(xmm3->encoding(), dst->encoding());
-  emit_byte(0x66);
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66);
   emit_byte(0x73);
   emit_byte(0xC0 | encode);
   emit_byte(shift);
@@ -2449,36 +2481,52 @@
 
 void Assembler::ptest(XMMRegister dst, Address src) {
   assert(VM_Version::supports_sse4_1(), "");
-
-  InstructionMark im(this);
-  emit_byte(0x66);
-  prefix(src, dst);
-  emit_byte(0x0F);
-  emit_byte(0x38);
+  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
+  InstructionMark im(this);
+  simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
   emit_byte(0x17);
   emit_operand(dst, src);
 }
 
 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_sse4_1(), "");
-
-  emit_byte(0x66);
-  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
-  emit_byte(0x38);
+  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
   emit_byte(0x17);
   emit_byte(0xC0 | encode);
 }
 
+void Assembler::punpcklbw(XMMRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
+  InstructionMark im(this);
+  simd_prefix(dst, dst, src, VEX_SIMD_66);
+  emit_byte(0x60);
+  emit_operand(dst, src);
+}
+
 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0x66);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
   emit_byte(0x60);
   emit_byte(0xC0 | encode);
 }
 
+void Assembler::punpckldq(XMMRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
+  InstructionMark im(this);
+  simd_prefix(dst, dst, src, VEX_SIMD_66);
+  emit_byte(0x62);
+  emit_operand(dst, src);
+}
+
+void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
+  emit_byte(0x62);
+  emit_byte(0xC0 | encode);
+}
+
 void Assembler::push(int32_t imm32) {
   // in 64bits we push 64bits onto the stack but only
   // take a 32bit immediate
@@ -2508,20 +2556,16 @@
 
 void Assembler::pxor(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  emit_byte(0x66);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
+  InstructionMark im(this);
+  simd_prefix(dst, dst, src, VEX_SIMD_66);
   emit_byte(0xEF);
   emit_operand(dst, src);
 }
 
 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  emit_byte(0x66);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
   emit_byte(0xEF);
   emit_byte(0xC0 | encode);
 }
@@ -2683,12 +2727,8 @@
 }
 
 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
-  // HMM Table D-1 says sse2
-  // NOT_LP64(assert(VM_Version::supports_sse(), ""));
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0xF2);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
   emit_byte(0x51);
   emit_byte(0xC0 | encode);
 }
@@ -2696,30 +2736,22 @@
 void Assembler::sqrtsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  emit_byte(0xF2);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  simd_prefix(dst, dst, src, VEX_SIMD_F2);
   emit_byte(0x51);
   emit_operand(dst, src);
 }
 
 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
-  // HMM Table D-1 says sse2
-  // NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0xF3);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  NOT_LP64(assert(VM_Version::supports_sse(), ""));
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
   emit_byte(0x51);
   emit_byte(0xC0 | encode);
 }
 
 void Assembler::sqrtss(XMMRegister dst, Address src) {
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  emit_byte(0xF3);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  NOT_LP64(assert(VM_Version::supports_sse(), ""));
+  InstructionMark im(this);
+  simd_prefix(dst, dst, src, VEX_SIMD_F3);
   emit_byte(0x51);
   emit_operand(dst, src);
 }
@@ -2751,6 +2783,12 @@
   emit_arith(0x81, 0xE8, dst, imm32);
 }
 
+// Force generation of a 4 byte immediate value even if it fits into 8bit
+void Assembler::subl_imm32(Register dst, int32_t imm32) {
+  prefix(dst);
+  emit_arith_imm32(0x81, 0xE8, dst, imm32);
+}
+
 void Assembler::subl(Register dst, Address src) {
   InstructionMark im(this);
   prefix(src, dst);
@@ -2765,9 +2803,7 @@
 
 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0xF2);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
   emit_byte(0x5C);
   emit_byte(0xC0 | encode);
 }
@@ -2775,18 +2811,14 @@
 void Assembler::subsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  emit_byte(0xF2);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  simd_prefix(dst, dst, src, VEX_SIMD_F2);
   emit_byte(0x5C);
   emit_operand(dst, src);
 }
 
 void Assembler::subss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_byte(0xF3);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
   emit_byte(0x5C);
   emit_byte(0xC0 | encode);
 }
@@ -2794,9 +2826,7 @@
 void Assembler::subss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   InstructionMark im(this);
-  emit_byte(0xF3);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  simd_prefix(dst, dst, src, VEX_SIMD_F3);
   emit_byte(0x5C);
   emit_operand(dst, src);
 }
@@ -2836,30 +2866,30 @@
 
 void Assembler::ucomisd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0x66);
-  ucomiss(dst, src);
+  InstructionMark im(this);
+  simd_prefix(dst, src, VEX_SIMD_66);
+  emit_byte(0x2E);
+  emit_operand(dst, src);
 }
 
 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0x66);
-  ucomiss(dst, src);
+  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
+  emit_byte(0x2E);
+  emit_byte(0xC0 | encode);
 }
 
 void Assembler::ucomiss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-
-  InstructionMark im(this);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  InstructionMark im(this);
+  simd_prefix(dst, src, VEX_SIMD_NONE);
   emit_byte(0x2E);
   emit_operand(dst, src);
 }
 
 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);
   emit_byte(0x2E);
   emit_byte(0xC0 | encode);
 }
@@ -2905,16 +2935,15 @@
 
 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0x66);
-  xorps(dst, src);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
+  emit_byte(0x57);
+  emit_byte(0xC0 | encode);
 }
 
 void Assembler::xorpd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  emit_byte(0x66);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  simd_prefix(dst, dst, src, VEX_SIMD_66);
   emit_byte(0x57);
   emit_operand(dst, src);
 }
@@ -2922,8 +2951,7 @@
 
 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE);
   emit_byte(0x57);
   emit_byte(0xC0 | encode);
 }
@@ -2931,12 +2959,166 @@
 void Assembler::xorps(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   InstructionMark im(this);
-  prefix(src, dst);
-  emit_byte(0x0F);
+  simd_prefix(dst, dst, src, VEX_SIMD_NONE);
   emit_byte(0x57);
   emit_operand(dst, src);
 }
 
+// AVX 3-operands non destructive source instructions (encoded with VEX prefix)
+
+void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
+  assert(VM_Version::supports_avx(), "");
+  InstructionMark im(this);
+  vex_prefix(dst, nds, src, VEX_SIMD_F2);
+  emit_byte(0x58);
+  emit_operand(dst, src);
+}
+
+void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+  assert(VM_Version::supports_avx(), "");
+  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);
+  emit_byte(0x58);
+  emit_byte(0xC0 | encode);
+}
+
+void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
+  assert(VM_Version::supports_avx(), "");
+  InstructionMark im(this);
+  vex_prefix(dst, nds, src, VEX_SIMD_F3);
+  emit_byte(0x58);
+  emit_operand(dst, src);
+}
+
+void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+  assert(VM_Version::supports_avx(), "");
+  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);
+  emit_byte(0x58);
+  emit_byte(0xC0 | encode);
+}
+
+void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src) {
+  assert(VM_Version::supports_avx(), "");
+  InstructionMark im(this);
+  vex_prefix(dst, nds, src, VEX_SIMD_66); // 128-bit vector
+  emit_byte(0x54);
+  emit_operand(dst, src);
+}
+
+void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src) {
+  assert(VM_Version::supports_avx(), "");
+  InstructionMark im(this);
+  vex_prefix(dst, nds, src, VEX_SIMD_NONE); // 128-bit vector
+  emit_byte(0x54);
+  emit_operand(dst, src);
+}
+
+void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
+  assert(VM_Version::supports_avx(), "");
+  InstructionMark im(this);
+  vex_prefix(dst, nds, src, VEX_SIMD_F2);
+  emit_byte(0x5E);
+  emit_operand(dst, src);
+}
+
+void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+  assert(VM_Version::supports_avx(), "");
+  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);
+  emit_byte(0x5E);
+  emit_byte(0xC0 | encode);
+}
+
+void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
+  assert(VM_Version::supports_avx(), "");
+  InstructionMark im(this);
+  vex_prefix(dst, nds, src, VEX_SIMD_F3);
+  emit_byte(0x5E);
+  emit_operand(dst, src);
+}
+
+void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+  assert(VM_Version::supports_avx(), "");
+  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);
+  emit_byte(0x5E);
+  emit_byte(0xC0 | encode);
+}
+
+void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
+  assert(VM_Version::supports_avx(), "");
+  InstructionMark im(this);
+  vex_prefix(dst, nds, src, VEX_SIMD_F2);
+  emit_byte(0x59);
+  emit_operand(dst, src);
+}
+
+void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+  assert(VM_Version::supports_avx(), "");
+  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);
+  emit_byte(0x59);
+  emit_byte(0xC0 | encode);
+}
+
+void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
+  InstructionMark im(this);
+  vex_prefix(dst, nds, src, VEX_SIMD_F3);
+  emit_byte(0x59);
+  emit_operand(dst, src);
+}
+
+void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+  assert(VM_Version::supports_avx(), "");
+  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);
+  emit_byte(0x59);
+  emit_byte(0xC0 | encode);
+}
+
+
+void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
+  assert(VM_Version::supports_avx(), "");
+  InstructionMark im(this);
+  vex_prefix(dst, nds, src, VEX_SIMD_F2);
+  emit_byte(0x5C);
+  emit_operand(dst, src);
+}
+
+void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+  assert(VM_Version::supports_avx(), "");
+  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);
+  emit_byte(0x5C);
+  emit_byte(0xC0 | encode);
+}
+
+void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
+  assert(VM_Version::supports_avx(), "");
+  InstructionMark im(this);
+  vex_prefix(dst, nds, src, VEX_SIMD_F3);
+  emit_byte(0x5C);
+  emit_operand(dst, src);
+}
+
+void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+  assert(VM_Version::supports_avx(), "");
+  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);
+  emit_byte(0x5C);
+  emit_byte(0xC0 | encode);
+}
+
+void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src) {
+  assert(VM_Version::supports_avx(), "");
+  InstructionMark im(this);
+  vex_prefix(dst, nds, src, VEX_SIMD_66); // 128-bit vector
+  emit_byte(0x57);
+  emit_operand(dst, src);
+}
+
+void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src) {
+  assert(VM_Version::supports_avx(), "");
+  InstructionMark im(this);
+  vex_prefix(dst, nds, src, VEX_SIMD_NONE); // 128-bit vector
+  emit_byte(0x57);
+  emit_operand(dst, src);
+}
+
+
 #ifndef _LP64
 // 32bit only pieces of the assembler
 
@@ -3394,12 +3576,114 @@
   emit_byte(0xF1);
 }
 
+// SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding.
+static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 };
+// SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding.
+static int simd_opc[4] = { 0,    0, 0x38, 0x3A };
+
+// Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding.
+void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
+  if (pre > 0) {
+    emit_byte(simd_pre[pre]);
+  }
+  if (rex_w) {
+    prefixq(adr, xreg);
+  } else {
+    prefix(adr, xreg);
+  }
+  if (opc > 0) {
+    emit_byte(0x0F);
+    int opc2 = simd_opc[opc];
+    if (opc2 > 0) {
+      emit_byte(opc2);
+    }
+  }
+}
+
+int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
+  if (pre > 0) {
+    emit_byte(simd_pre[pre]);
+  }
+  int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) :
+                          prefix_and_encode(dst_enc, src_enc);
+  if (opc > 0) {
+    emit_byte(0x0F);
+    int opc2 = simd_opc[opc];
+    if (opc2 > 0) {
+      emit_byte(opc2);
+    }
+  }
+  return encode;
+}
+
+
+void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, bool vector256) {
+  if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) {
+    prefix(VEX_3bytes);
+
+    int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0);
+    byte1 = (~byte1) & 0xE0;
+    byte1 |= opc;
+    a_byte(byte1);
+
+    int byte2 = ((~nds_enc) & 0xf) << 3;
+    byte2 |= (vex_w ? VEX_W : 0) | (vector256 ? 4 : 0) | pre;
+    emit_byte(byte2);
+  } else {
+    prefix(VEX_2bytes);
+
+    int byte1 = vex_r ? VEX_R : 0;
+    byte1 = (~byte1) & 0x80;
+    byte1 |= ((~nds_enc) & 0xf) << 3;
+    byte1 |= (vector256 ? 4 : 0) | pre;
+    emit_byte(byte1);
+  }
+}
+
+void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256){
+  bool vex_r = (xreg_enc >= 8);
+  bool vex_b = adr.base_needs_rex();
+  bool vex_x = adr.index_needs_rex();
+  vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256);
+}
+
+int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256) {
+  bool vex_r = (dst_enc >= 8);
+  bool vex_b = (src_enc >= 8);
+  bool vex_x = false;
+  vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256);
+  return (((dst_enc & 7) << 3) | (src_enc & 7));
+}
+
+
+void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) {
+  if (UseAVX > 0) {
+    int xreg_enc = xreg->encoding();
+    int  nds_enc = nds->is_valid() ? nds->encoding() : 0;
+    vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector256);
+  } else {
+    assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding");
+    rex_prefix(adr, xreg, pre, opc, rex_w);
+  }
+}
+
+int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) {
+  int dst_enc = dst->encoding();
+  int src_enc = src->encoding();
+  if (UseAVX > 0) {
+    int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+    return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector256);
+  } else {
+    assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding");
+    return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w);
+  }
+}
 
 #ifndef _LP64
 
 void Assembler::incl(Register dst) {
   // Don't use it directly. Use MacroAssembler::incrementl() instead.
- emit_byte(0x40 | dst->encoding());
+  emit_byte(0x40 | dst->encoding());
 }
 
 void Assembler::lea(Register dst, Address src) {
@@ -3756,6 +4040,38 @@
   }
 }
 
+void Assembler::prefixq(Address adr, XMMRegister src) {
+  if (src->encoding() < 8) {
+    if (adr.base_needs_rex()) {
+      if (adr.index_needs_rex()) {
+        prefix(REX_WXB);
+      } else {
+        prefix(REX_WB);
+      }
+    } else {
+      if (adr.index_needs_rex()) {
+        prefix(REX_WX);
+      } else {
+        prefix(REX_W);
+      }
+    }
+  } else {
+    if (adr.base_needs_rex()) {
+      if (adr.index_needs_rex()) {
+        prefix(REX_WRXB);
+      } else {
+        prefix(REX_WRB);
+      }
+    } else {
+      if (adr.index_needs_rex()) {
+        prefix(REX_WRX);
+      } else {
+        prefix(REX_WR);
+      }
+    }
+  }
+}
+
 void Assembler::adcq(Register dst, int32_t imm32) {
   (void) prefixq_and_encode(dst->encoding());
   emit_arith(0x81, 0xD0, dst, imm32);
@@ -3918,36 +4234,44 @@
 
 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0xF2);
-  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2);
   emit_byte(0x2A);
   emit_byte(0xC0 | encode);
 }
 
+void Assembler::cvtsi2sdq(XMMRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionMark im(this);
+  simd_prefix_q(dst, dst, src, VEX_SIMD_F2);
+  emit_byte(0x2A);
+  emit_operand(dst, src);
+}
+
 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_byte(0xF3);
-  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3);
   emit_byte(0x2A);
   emit_byte(0xC0 | encode);
 }
 
+void Assembler::cvtsi2ssq(XMMRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse(), ""));
+  InstructionMark im(this);
+  simd_prefix_q(dst, dst, src, VEX_SIMD_F3);
+  emit_byte(0x2A);
+  emit_operand(dst, src);
+}
+
 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_byte(0xF2);
-  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2);
   emit_byte(0x2C);
   emit_byte(0xC0 | encode);
 }
 
 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_byte(0xF3);
-  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3);
   emit_byte(0x2C);
   emit_byte(0xC0 | encode);
 }
@@ -4107,21 +4431,17 @@
 
 void Assembler::movdq(XMMRegister dst, Register src) {
   // table D-1 says MMX/SSE2
-  NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), ""));
-  emit_byte(0x66);
-  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_byte(0x0F);
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66);
   emit_byte(0x6E);
   emit_byte(0xC0 | encode);
 }
 
 void Assembler::movdq(Register dst, XMMRegister src) {
   // table D-1 says MMX/SSE2
-  NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), ""));
-  emit_byte(0x66);
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // swap src/dst to get correct prefix
-  int encode = prefixq_and_encode(src->encoding(), dst->encoding());
-  emit_byte(0x0F);
+  int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66);
   emit_byte(0x7E);
   emit_byte(0xC0 | encode);
 }
@@ -4460,6 +4780,12 @@
   emit_arith(0x81, 0xE8, dst, imm32);
 }
 
+// Force generation of a 4 byte immediate value even if it fits into 8bit
+void Assembler::subq_imm32(Register dst, int32_t imm32) {
+  (void) prefixq_and_encode(dst->encoding());
+  emit_arith_imm32(0x81, 0xE8, dst, imm32);
+}
+
 void Assembler::subq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
@@ -4632,7 +4958,7 @@
     null_check_offset = offset();
   }
   movl(tmp_reg, klass_addr);
-  xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+  xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset()));
   andl(swap_reg, ~((int) markOopDesc::age_mask_in_place));
   if (need_tmp_reg) {
     pop(tmp_reg);
@@ -4719,7 +5045,7 @@
   }
   get_thread(tmp_reg);
   movl(swap_reg, klass_addr);
-  orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+  orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset()));
   movl(swap_reg, saved_mark_addr);
   if (os::is_MP()) {
     lock();
@@ -4757,7 +5083,7 @@
     push(tmp_reg);
   }
   movl(tmp_reg, klass_addr);
-  movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+  movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset()));
   if (os::is_MP()) {
     lock();
   }
@@ -4801,15 +5127,6 @@
   }
 }
 
-void MacroAssembler::fat_nop() {
-  // A 5 byte nop that is safe for patching (see patch_verified_entry)
-  emit_byte(0x26); // es:
-  emit_byte(0x2e); // cs:
-  emit_byte(0x64); // fs:
-  emit_byte(0x65); // gs:
-  emit_byte(0x90);
-}
-
 void MacroAssembler::jC2(Register tmp, Label& L) {
   // set parity bit if FPU flag C2 is set (via rax)
   save_rax(tmp);
@@ -5404,17 +5721,6 @@
   /* else */      { subq(dst, value)       ; return; }
 }
 
-void MacroAssembler::fat_nop() {
-  // A 5 byte nop that is safe for patching (see patch_verified_entry)
-  // Recommened sequence from 'Software Optimization Guide for the AMD
-  // Hammer Processor'
-  emit_byte(0x66);
-  emit_byte(0x66);
-  emit_byte(0x90);
-  emit_byte(0x66);
-  emit_byte(0x90);
-}
-
 void MacroAssembler::incrementq(Register reg, int value) {
   if (value == min_jint) { addq(reg, value); return; }
   if (value <  0) { decrementq(reg, -value); return; }
@@ -5680,6 +5986,24 @@
   LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src));
 }
 
+void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::addsd(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::addsd(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    addss(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    addss(dst, Address(rscratch1, 0));
+  }
+}
+
 void MacroAssembler::align(int modulus) {
   if (offset() % modulus != 0) {
     nop(modulus - (offset() % modulus));
@@ -5687,11 +6011,24 @@
 }
 
 void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) {
+  // Used in sign-masking with aligned address.
+  assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
   if (reachable(src)) {
-    andpd(dst, as_Address(src));
+    Assembler::andpd(dst, as_Address(src));
   } else {
     lea(rscratch1, src);
-    andpd(dst, Address(rscratch1, 0));
+    Assembler::andpd(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) {
+  // Used in sign-masking with aligned address.
+  assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
+  if (reachable(src)) {
+    Assembler::andps(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::andps(dst, Address(rscratch1, 0));
   }
 }
 
@@ -5968,7 +6305,9 @@
   assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
   LP64_ONLY(assert(java_thread == r15_thread, "unexpected register"));
 #ifdef ASSERT
-  LP64_ONLY(if (UseCompressedOops) verify_heapbase("call_VM_base");)
+  // TraceBytecodes does not use r12 but saves it over the call, so don't verify
+  // r12 is the heapbase.
+  LP64_ONLY(if (UseCompressedOops && !TraceBytecodes) verify_heapbase("call_VM_base");)
 #endif // ASSERT
 
   assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
@@ -6268,19 +6607,19 @@
 
 void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) {
   if (reachable(src)) {
-    comisd(dst, as_Address(src));
+    Assembler::comisd(dst, as_Address(src));
   } else {
     lea(rscratch1, src);
-    comisd(dst, Address(rscratch1, 0));
+    Assembler::comisd(dst, Address(rscratch1, 0));
   }
 }
 
 void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) {
   if (reachable(src)) {
-    comiss(dst, as_Address(src));
+    Assembler::comiss(dst, as_Address(src));
   } else {
     lea(rscratch1, src);
-    comiss(dst, Address(rscratch1, 0));
+    Assembler::comiss(dst, Address(rscratch1, 0));
   }
 }
 
@@ -6364,6 +6703,24 @@
   sarl(reg, shift_value);
 }
 
+void MacroAssembler::divsd(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::divsd(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::divsd(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::divss(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::divss(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::divss(dst, Address(rscratch1, 0));
+  }
+}
+
 // !defined(COMPILER2) is because of stupid core builds
 #if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2)
 void MacroAssembler::empty_FPU_stack() {
@@ -6415,6 +6772,19 @@
   mov(rbp, rsp);
 }
 
+// A 5 byte nop that is safe for patching (see patch_verified_entry)
+void MacroAssembler::fat_nop() {
+  if (UseAddressNop) {
+    addr_nop_5();
+  } else {
+    emit_byte(0x26); // es:
+    emit_byte(0x2e); // cs:
+    emit_byte(0x64); // fs:
+    emit_byte(0x65); // gs:
+    emit_byte(0x90);
+  }
+}
+
 void MacroAssembler::fcmp(Register tmp) {
   fcmp(tmp, 1, true, true);
 }
@@ -6803,12 +7173,39 @@
   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
 }
 
+void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::movsd(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::movsd(dst, Address(rscratch1, 0));
+  }
+}
+
 void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) {
   if (reachable(src)) {
-    movss(dst, as_Address(src));
+    Assembler::movss(dst, as_Address(src));
   } else {
     lea(rscratch1, src);
-    movss(dst, Address(rscratch1, 0));
+    Assembler::movss(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::mulsd(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::mulsd(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::mulss(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::mulss(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::mulss(dst, Address(rscratch1, 0));
   }
 }
 
@@ -6990,6 +7387,193 @@
   testl(dst, as_Address(src));
 }
 
+void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::sqrtsd(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::sqrtsd(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::sqrtss(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::sqrtss(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::sqrtss(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::subsd(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::subsd(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::subsd(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::subss(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::subss(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::subss(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::ucomisd(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::ucomisd(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::ucomiss(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::ucomiss(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) {
+  // Used in sign-bit flipping with aligned address.
+  assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
+  if (reachable(src)) {
+    Assembler::xorpd(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::xorpd(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
+  // Used in sign-bit flipping with aligned address.
+  assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
+  if (reachable(src)) {
+    Assembler::xorps(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::xorps(dst, Address(rscratch1, 0));
+  }
+}
+
+// AVX 3-operands instructions
+
+void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+  if (reachable(src)) {
+    vaddsd(dst, nds, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    vaddsd(dst, nds, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+  if (reachable(src)) {
+    vaddss(dst, nds, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    vaddss(dst, nds, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+  if (reachable(src)) {
+    vandpd(dst, nds, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    vandpd(dst, nds, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+  if (reachable(src)) {
+    vandps(dst, nds, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    vandps(dst, nds, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+  if (reachable(src)) {
+    vdivsd(dst, nds, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    vdivsd(dst, nds, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+  if (reachable(src)) {
+    vdivss(dst, nds, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    vdivss(dst, nds, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+  if (reachable(src)) {
+    vmulsd(dst, nds, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    vmulsd(dst, nds, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+  if (reachable(src)) {
+    vmulss(dst, nds, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    vmulss(dst, nds, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+  if (reachable(src)) {
+    vsubsd(dst, nds, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    vsubsd(dst, nds, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+  if (reachable(src)) {
+    vsubss(dst, nds, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    vsubss(dst, nds, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+  if (reachable(src)) {
+    vxorpd(dst, nds, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    vxorpd(dst, nds, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+  if (reachable(src)) {
+    vxorps(dst, nds, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    vxorps(dst, nds, Address(rscratch1, 0));
+  }
+}
+
+
 //////////////////////////////////////////////////////////////////////////////////
 #ifndef SERIALGC
 
@@ -7260,6 +7844,11 @@
   LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32));
 }
 
+// Force generation of a 4 byte immediate value even if it fits into 8bit
+void MacroAssembler::subptr_imm32(Register dst, int32_t imm32) {
+  LP64_ONLY(subq_imm32(dst, imm32)) NOT_LP64(subl_imm32(dst, imm32));
+}
+
 void MacroAssembler::subptr(Register dst, Register src) {
   LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src));
 }
@@ -7428,6 +8017,16 @@
                                           Register var_size_in_bytes,
                                           int con_size_in_bytes,
                                           Register t1) {
+  if (!thread->is_valid()) {
+#ifdef _LP64
+    thread = r15_thread;
+#else
+    assert(t1->is_valid(), "need temp reg");
+    thread = t1;
+    get_thread(thread);
+#endif
+  }
+
 #ifdef _LP64
   if (var_size_in_bytes->is_valid()) {
     addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
@@ -7435,12 +8034,6 @@
     addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
   }
 #else
-  if (!thread->is_valid()) {
-    assert(t1->is_valid(), "need temp reg");
-    thread = t1;
-    get_thread(thread);
-  }
-
   if (var_size_in_bytes->is_valid()) {
     addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
   } else {
@@ -7683,10 +8276,8 @@
   if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
   assert(label_nulls <= 1, "at most one NULL in the batch");
 
-  int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
-                   Klass::secondary_super_cache_offset_in_bytes());
-  int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
-                    Klass::super_check_offset_offset_in_bytes());
+  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
+  int sco_offset = in_bytes(Klass::super_check_offset_offset());
   Address super_check_offset_addr(super_klass, sco_offset);
 
   // Hacked jcc, which "knows" that L_fallthrough, at least, is in
@@ -7784,10 +8375,8 @@
   assert(label_nulls <= 1, "at most one NULL in the batch");
 
   // a couple of useful fields in sub_klass:
-  int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
-                   Klass::secondary_supers_offset_in_bytes());
-  int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
-                   Klass::secondary_super_cache_offset_in_bytes());
+  int ss_offset = in_bytes(Klass::secondary_supers_offset());
+  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
   Address secondary_supers_addr(sub_klass, ss_offset);
   Address super_cache_addr(     sub_klass, sc_offset);
 
@@ -7874,32 +8463,6 @@
 }
 
 
-void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) {
-  ucomisd(dst, as_Address(src));
-}
-
-void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) {
-  ucomiss(dst, as_Address(src));
-}
-
-void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) {
-  if (reachable(src)) {
-    xorpd(dst, as_Address(src));
-  } else {
-    lea(rscratch1, src);
-    xorpd(dst, Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
-  if (reachable(src)) {
-    xorps(dst, as_Address(src));
-  } else {
-    lea(rscratch1, src);
-    xorps(dst, Address(rscratch1, 0));
-  }
-}
-
 void MacroAssembler::cmov32(Condition cc, Register dst, Address src) {
   if (VM_Version::supports_cmov()) {
     cmovl(cc, dst, src);
@@ -8485,20 +9048,20 @@
     if (Universe::narrow_oop_shift() != 0) {
       assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
       if (LogMinObjAlignmentInBytes == Address::times_8) {
-        movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+        movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset()));
       } else {
         // OK to use shift since we don't need to preserve flags.
         shlq(dst, LogMinObjAlignmentInBytes);
-        movq(dst, Address(r12_heapbase, dst, Address::times_1, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+        movq(dst, Address(r12_heapbase, dst, Address::times_1, Klass::prototype_header_offset()));
       }
     } else {
-      movq(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+      movq(dst, Address(dst, Klass::prototype_header_offset()));
     }
   } else
 #endif
   {
     movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
-    movptr(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+    movptr(dst, Address(dst, Klass::prototype_header_offset()));
   }
 }
 
@@ -8753,12 +9316,87 @@
 }
 #endif // _LP64
 
+
+// C2 compiled method's prolog code.
+void MacroAssembler::verified_entry(int framesize, bool stack_bang, bool fp_mode_24b) {
+
+  // WARNING: Initial instruction MUST be 5 bytes or longer so that
+  // NativeJump::patch_verified_entry will be able to patch out the entry
+  // code safely. The push to verify stack depth is ok at 5 bytes,
+  // the frame allocation can be either 3 or 6 bytes. So if we don't do
+  // stack bang then we must use the 6 byte frame allocation even if
+  // we have no frame. :-(
+
+  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
+  // Remove word for return addr
+  framesize -= wordSize;
+
+  // Calls to C2R adapters often do not accept exceptional returns.
+  // We require that their callers must bang for them.  But be careful, because
+  // some VM calls (such as call site linkage) can use several kilobytes of
+  // stack.  But the stack safety zone should account for that.
+  // See bugs 4446381, 4468289, 4497237.
+  if (stack_bang) {
+    generate_stack_overflow_check(framesize);
+
+    // We always push rbp, so that on return to interpreter rbp, will be
+    // restored correctly and we can correct the stack.
+    push(rbp);
+    // Remove word for ebp
+    framesize -= wordSize;
+
+    // Create frame
+    if (framesize) {
+      subptr(rsp, framesize);
+    }
+  } else {
+    // Create frame (force generation of a 4 byte immediate value)
+    subptr_imm32(rsp, framesize);
+
+    // Save RBP register now.
+    framesize -= wordSize;
+    movptr(Address(rsp, framesize), rbp);
+  }
+
+  if (VerifyStackAtCalls) { // Majik cookie to verify stack depth
+    framesize -= wordSize;
+    movptr(Address(rsp, framesize), (int32_t)0xbadb100d);
+  }
+
+#ifndef _LP64
+  // If method sets FPU control word do it now
+  if (fp_mode_24b) {
+    fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
+  }
+  if (UseSSE >= 2 && VerifyFPU) {
+    verify_FPU(0, "FPU stack must be clean on entry");
+  }
+#endif
+
+#ifdef ASSERT
+  if (VerifyStackAtCalls) {
+    Label L;
+    push(rax);
+    mov(rax, rsp);
+    andptr(rax, StackAlignmentInBytes-1);
+    cmpptr(rax, StackAlignmentInBytes-wordSize);
+    pop(rax);
+    jcc(Assembler::equal, L);
+    stop("Stack is not properly aligned!");
+    bind(L);
+  }
+#endif
+
+}
+
+
 // IndexOf for constant substrings with size >= 8 chars
 // which don't need to be loaded through stack.
 void MacroAssembler::string_indexofC8(Register str1, Register str2,
                                       Register cnt1, Register cnt2,
                                       int int_cnt2,  Register result,
                                       XMMRegister vec, Register tmp) {
+  ShortBranchVerifier sbv(this);
   assert(UseSSE42Intrinsics, "SSE4.2 is required");
 
   // This method uses pcmpestri inxtruction with bound registers
@@ -8888,9 +9526,9 @@
       pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d);
     }
     // Need to reload strings pointers if not matched whole vector
-    jccb(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
+    jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
     addptr(cnt2, 8);
-    jccb(Assembler::negative, SCAN_SUBSTR);
+    jcc(Assembler::negative, SCAN_SUBSTR);
     // Fall through if found full substring
 
   } // (int_cnt2 > 8)
@@ -8909,6 +9547,7 @@
                                     Register cnt1, Register cnt2,
                                     int int_cnt2,  Register result,
                                     XMMRegister vec, Register tmp) {
+  ShortBranchVerifier sbv(this);
   assert(UseSSE42Intrinsics, "SSE4.2 is required");
   //
   // int_cnt2 is length of small (< 8 chars) constant substring
@@ -9170,6 +9809,7 @@
 void MacroAssembler::string_compare(Register str1, Register str2,
                                     Register cnt1, Register cnt2, Register result,
                                     XMMRegister vec1) {
+  ShortBranchVerifier sbv(this);
   Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;
 
   // Compute the minimum of the string lengths and the
@@ -9306,6 +9946,7 @@
 void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2,
                                         Register limit, Register result, Register chr,
                                         XMMRegister vec1, XMMRegister vec2) {
+  ShortBranchVerifier sbv(this);
   Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR;
 
   int length_offset  = arrayOopDesc::length_offset_in_bytes();
@@ -9425,6 +10066,7 @@
 void MacroAssembler::generate_fill(BasicType t, bool aligned,
                                    Register to, Register value, Register count,
                                    Register rtmp, XMMRegister xtmp) {
+  ShortBranchVerifier sbv(this);
   assert_different_registers(to, value, count, rtmp);
   Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte;
   Label L_fill_2_bytes, L_fill_4_bytes;
--- a/src/cpu/x86/vm/assembler_x86.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/x86/vm/assembler_x86.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -503,7 +503,31 @@
     REX_WR     = 0x4C,
     REX_WRB    = 0x4D,
     REX_WRX    = 0x4E,
-    REX_WRXB   = 0x4F
+    REX_WRXB   = 0x4F,
+
+    VEX_3bytes = 0xC4,
+    VEX_2bytes = 0xC5
+  };
+
+  enum VexPrefix {
+    VEX_B = 0x20,
+    VEX_X = 0x40,
+    VEX_R = 0x80,
+    VEX_W = 0x80
+  };
+
+  enum VexSimdPrefix {
+    VEX_SIMD_NONE = 0x0,
+    VEX_SIMD_66   = 0x1,
+    VEX_SIMD_F3   = 0x2,
+    VEX_SIMD_F2   = 0x3
+  };
+
+  enum VexOpcode {
+    VEX_OPCODE_NONE  = 0x0,
+    VEX_OPCODE_0F    = 0x1,
+    VEX_OPCODE_0F_38 = 0x2,
+    VEX_OPCODE_0F_3A = 0x3
   };
 
   enum WhichOperand {
@@ -546,16 +570,105 @@
   void prefixq(Address adr);
 
   void prefix(Address adr, Register reg,  bool byteinst = false);
+  void prefix(Address adr, XMMRegister reg);
   void prefixq(Address adr, Register reg);
-
-  void prefix(Address adr, XMMRegister reg);
+  void prefixq(Address adr, XMMRegister reg);
 
   void prefetch_prefix(Address src);
 
+  void rex_prefix(Address adr, XMMRegister xreg,
+                  VexSimdPrefix pre, VexOpcode opc, bool rex_w);
+  int  rex_prefix_and_encode(int dst_enc, int src_enc,
+                             VexSimdPrefix pre, VexOpcode opc, bool rex_w);
+
+  void vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w,
+                  int nds_enc, VexSimdPrefix pre, VexOpcode opc,
+                  bool vector256);
+
+  void vex_prefix(Address adr, int nds_enc, int xreg_enc,
+                  VexSimdPrefix pre, VexOpcode opc,
+                  bool vex_w, bool vector256);
+
+  void vex_prefix(XMMRegister dst, XMMRegister nds, Address src,
+                  VexSimdPrefix pre, bool vector256 = false) {
+     vex_prefix(src, nds->encoding(), dst->encoding(),
+                pre, VEX_OPCODE_0F, false, vector256);
+  }
+
+  int  vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
+                             VexSimdPrefix pre, VexOpcode opc,
+                             bool vex_w, bool vector256);
+
+  int  vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
+                             VexSimdPrefix pre, bool vector256 = false) {
+     return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
+                                  pre, VEX_OPCODE_0F, false, vector256);
+  }
+
+  void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr,
+                   VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
+                   bool rex_w = false, bool vector256 = false);
+
+  void simd_prefix(XMMRegister dst, Address src,
+                   VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
+    simd_prefix(dst, xnoreg, src, pre, opc);
+  }
+  void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre) {
+    simd_prefix(src, dst, pre);
+  }
+  void simd_prefix_q(XMMRegister dst, XMMRegister nds, Address src,
+                     VexSimdPrefix pre) {
+    bool rex_w = true;
+    simd_prefix(dst, nds, src, pre, VEX_OPCODE_0F, rex_w);
+  }
+
+
+  int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
+                             VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
+                             bool rex_w = false, bool vector256 = false);
+
+  int simd_prefix_and_encode(XMMRegister dst, XMMRegister src,
+                             VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
+    return simd_prefix_and_encode(dst, xnoreg, src, pre, opc);
+  }
+
+  // Move/convert 32-bit integer value.
+  int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src,
+                             VexSimdPrefix pre) {
+    // It is OK to cast from Register to XMMRegister to pass argument here
+    // since only encoding is used in simd_prefix_and_encode() and number of
+    // Gen and Xmm registers are the same.
+    return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre);
+  }
+  int simd_prefix_and_encode(XMMRegister dst, Register src, VexSimdPrefix pre) {
+    return simd_prefix_and_encode(dst, xnoreg, src, pre);
+  }
+  int simd_prefix_and_encode(Register dst, XMMRegister src,
+                             VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
+    return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc);
+  }
+
+  // Move/convert 64-bit integer value.
+  int simd_prefix_and_encode_q(XMMRegister dst, XMMRegister nds, Register src,
+                               VexSimdPrefix pre) {
+    bool rex_w = true;
+    return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, VEX_OPCODE_0F, rex_w);
+  }
+  int simd_prefix_and_encode_q(XMMRegister dst, Register src, VexSimdPrefix pre) {
+    return simd_prefix_and_encode_q(dst, xnoreg, src, pre);
+  }
+  int simd_prefix_and_encode_q(Register dst, XMMRegister src,
+                             VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
+    bool rex_w = true;
+    return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc, rex_w);
+  }
+
   // Helper functions for groups of instructions
   void emit_arith_b(int op1, int op2, Register dst, int imm8);
 
   void emit_arith(int op1, int op2, Register dst, int32_t imm32);
+  // Force generation of a 4 byte immediate value even if it fits into 8bit
+  void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32);
   // only 32bit??
   void emit_arith(int op1, int op2, Register dst, jobject obj);
   void emit_arith(int op1, int op2, Register dst, Register src);
@@ -764,6 +877,7 @@
   void addss(XMMRegister dst, Address src);
   void addss(XMMRegister dst, XMMRegister src);
 
+  void andl(Address  dst, int32_t imm32);
   void andl(Register dst, int32_t imm32);
   void andl(Register dst, Address src);
   void andl(Register dst, Register src);
@@ -774,9 +888,11 @@
   void andq(Register dst, Register src);
 
   // Bitwise Logical AND of Packed Double-Precision Floating-Point Values
-  void andpd(XMMRegister dst, Address src);
   void andpd(XMMRegister dst, XMMRegister src);
 
+  // Bitwise Logical AND of Packed Single-Precision Floating-Point Values
+  void andps(XMMRegister dst, XMMRegister src);
+
   void bsfl(Register dst, Register src);
   void bsrl(Register dst, Register src);
 
@@ -837,9 +953,11 @@
 
   // Ordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS
   void comisd(XMMRegister dst, Address src);
+  void comisd(XMMRegister dst, XMMRegister src);
 
   // Ordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS
   void comiss(XMMRegister dst, Address src);
+  void comiss(XMMRegister dst, XMMRegister src);
 
   // Identify processor type and features
   void cpuid() {
@@ -849,14 +967,19 @@
 
   // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value
   void cvtsd2ss(XMMRegister dst, XMMRegister src);
+  void cvtsd2ss(XMMRegister dst, Address src);
 
   // Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value
   void cvtsi2sdl(XMMRegister dst, Register src);
+  void cvtsi2sdl(XMMRegister dst, Address src);
   void cvtsi2sdq(XMMRegister dst, Register src);
+  void cvtsi2sdq(XMMRegister dst, Address src);
 
   // Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value
   void cvtsi2ssl(XMMRegister dst, Register src);
+  void cvtsi2ssl(XMMRegister dst, Address src);
   void cvtsi2ssq(XMMRegister dst, Register src);
+  void cvtsi2ssq(XMMRegister dst, Address src);
 
   // Convert Packed Signed Doubleword Integers to Packed Double-Precision Floating-Point Value
   void cvtdq2pd(XMMRegister dst, XMMRegister src);
@@ -866,6 +989,7 @@
 
   // Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value
   void cvtss2sd(XMMRegister dst, XMMRegister src);
+  void cvtss2sd(XMMRegister dst, Address src);
 
   // Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer
   void cvttsd2sil(Register dst, Address src);
@@ -1140,8 +1264,6 @@
   void movdq(Register dst, XMMRegister src);
 
   // Move Aligned Double Quadword
-  void movdqa(Address     dst, XMMRegister src);
-  void movdqa(XMMRegister dst, Address src);
   void movdqa(XMMRegister dst, XMMRegister src);
 
   // Move Unaligned Double Quadword
@@ -1261,10 +1383,18 @@
   void orq(Register dst, Address src);
   void orq(Register dst, Register src);
 
+  // Pack with unsigned saturation
+  void packuswb(XMMRegister dst, XMMRegister src);
+  void packuswb(XMMRegister dst, Address src);
+
   // SSE4.2 string instructions
   void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
   void pcmpestri(XMMRegister xmm1, Address src, int imm8);
 
+  // SSE4.1 packed move
+  void pmovzxbw(XMMRegister dst, XMMRegister src);
+  void pmovzxbw(XMMRegister dst, Address src);
+
 #ifndef _LP64 // no 32bit push/pop on amd64
   void popl(Address dst);
 #endif
@@ -1292,6 +1422,7 @@
 
   // POR - Bitwise logical OR
   void por(XMMRegister dst, XMMRegister src);
+  void por(XMMRegister dst, Address src);
 
   // Shuffle Packed Doublewords
   void pshufd(XMMRegister dst, XMMRegister src, int mode);
@@ -1313,6 +1444,11 @@
 
   // Interleave Low Bytes
   void punpcklbw(XMMRegister dst, XMMRegister src);
+  void punpcklbw(XMMRegister dst, Address src);
+
+  // Interleave Low Doublewords
+  void punpckldq(XMMRegister dst, XMMRegister src);
+  void punpckldq(XMMRegister dst, Address src);
 
 #ifndef _LP64 // no 32bit push/pop on amd64
   void pushl(Address src);
@@ -1392,6 +1528,9 @@
   void subq(Register dst, Address src);
   void subq(Register dst, Register src);
 
+  // Force generation of a 4 byte immediate value even if it fits into 8bit
+  void subl_imm32(Register dst, int32_t imm32);
+  void subq_imm32(Register dst, int32_t imm32);
 
   // Subtract Scalar Double-Precision Floating-Point Values
   void subsd(XMMRegister dst, Address src);
@@ -1429,6 +1568,13 @@
   void xchgq(Register reg, Address adr);
   void xchgq(Register dst, Register src);
 
+  // Get Value of Extended Control Register
+  void xgetbv() {
+    emit_byte(0x0F);
+    emit_byte(0x01);
+    emit_byte(0xD0);
+  }
+
   void xorl(Register dst, int32_t imm32);
   void xorl(Register dst, Address src);
   void xorl(Register dst, Register src);
@@ -1437,14 +1583,44 @@
   void xorq(Register dst, Register src);
 
   // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
-  void xorpd(XMMRegister dst, Address src);
   void xorpd(XMMRegister dst, XMMRegister src);
 
   // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values
-  void xorps(XMMRegister dst, Address src);
   void xorps(XMMRegister dst, XMMRegister src);
 
   void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0
+
+  // AVX 3-operands instructions (encoded with VEX prefix)
+  void vaddsd(XMMRegister dst, XMMRegister nds, Address src);
+  void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
+  void vaddss(XMMRegister dst, XMMRegister nds, Address src);
+  void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src);
+  void vandpd(XMMRegister dst, XMMRegister nds, Address src);
+  void vandps(XMMRegister dst, XMMRegister nds, Address src);
+  void vdivsd(XMMRegister dst, XMMRegister nds, Address src);
+  void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
+  void vdivss(XMMRegister dst, XMMRegister nds, Address src);
+  void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src);
+  void vmulsd(XMMRegister dst, XMMRegister nds, Address src);
+  void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
+  void vmulss(XMMRegister dst, XMMRegister nds, Address src);
+  void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src);
+  void vsubsd(XMMRegister dst, XMMRegister nds, Address src);
+  void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
+  void vsubss(XMMRegister dst, XMMRegister nds, Address src);
+  void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);
+  void vxorpd(XMMRegister dst, XMMRegister nds, Address src);
+  void vxorps(XMMRegister dst, XMMRegister nds, Address src);
+
+
+ protected:
+  // Next instructions require address alignment 16 bytes SSE mode.
+  // They should be called only from corresponding MacroAssembler instructions.
+  void andpd(XMMRegister dst, Address src);
+  void andps(XMMRegister dst, Address src);
+  void xorpd(XMMRegister dst, Address src);
+  void xorps(XMMRegister dst, Address src);
+
 };
 
 
@@ -1592,8 +1768,8 @@
   // Alignment
   void align(int modulus);
 
-  // Misc
-  void fat_nop(); // 5 byte nop
+  // A 5 byte nop that is safe for patching (see patch_verified_entry)
+  void fat_nop();
 
   // Stack frame creation/removal
   void enter();
@@ -2104,6 +2280,8 @@
 
   void subptr(Register dst, Address src) { LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); }
   void subptr(Register dst, int32_t src);
+  // Force generation of a 4 byte immediate value even if it fits into 8bit
+  void subptr_imm32(Register dst, int32_t src);
   void subptr(Register dst, Register src);
   void subptr(Register dst, RegisterOrConstant src) {
     if (src.is_constant()) subptr(dst, (int) src.as_constant());
@@ -2175,9 +2353,15 @@
   void andpd(XMMRegister dst, Address src) { Assembler::andpd(dst, src); }
   void andpd(XMMRegister dst, AddressLiteral src);
 
+  void andps(XMMRegister dst, XMMRegister src) { Assembler::andps(dst, src); }
+  void andps(XMMRegister dst, Address src) { Assembler::andps(dst, src); }
+  void andps(XMMRegister dst, AddressLiteral src);
+
+  void comiss(XMMRegister dst, XMMRegister src) { Assembler::comiss(dst, src); }
   void comiss(XMMRegister dst, Address src) { Assembler::comiss(dst, src); }
   void comiss(XMMRegister dst, AddressLiteral src);
 
+  void comisd(XMMRegister dst, XMMRegister src) { Assembler::comisd(dst, src); }
   void comisd(XMMRegister dst, Address src) { Assembler::comisd(dst, src); }
   void comisd(XMMRegister dst, AddressLiteral src);
 
@@ -2211,62 +2395,62 @@
   void movss(XMMRegister dst, Address src)     { Assembler::movss(dst, src); }
   void movss(XMMRegister dst, AddressLiteral src);
 
-  void movlpd(XMMRegister dst, Address src)      {Assembler::movlpd(dst, src); }
+  void movlpd(XMMRegister dst, Address src)    {Assembler::movlpd(dst, src); }
   void movlpd(XMMRegister dst, AddressLiteral src);
 
 public:
 
   void addsd(XMMRegister dst, XMMRegister src)    { Assembler::addsd(dst, src); }
   void addsd(XMMRegister dst, Address src)        { Assembler::addsd(dst, src); }
-  void addsd(XMMRegister dst, AddressLiteral src) { Assembler::addsd(dst, as_Address(src)); }
+  void addsd(XMMRegister dst, AddressLiteral src);
 
   void addss(XMMRegister dst, XMMRegister src)    { Assembler::addss(dst, src); }
   void addss(XMMRegister dst, Address src)        { Assembler::addss(dst, src); }
-  void addss(XMMRegister dst, AddressLiteral src) { Assembler::addss(dst, as_Address(src)); }
+  void addss(XMMRegister dst, AddressLiteral src);
 
   void divsd(XMMRegister dst, XMMRegister src)    { Assembler::divsd(dst, src); }
   void divsd(XMMRegister dst, Address src)        { Assembler::divsd(dst, src); }
-  void divsd(XMMRegister dst, AddressLiteral src) { Assembler::divsd(dst, as_Address(src)); }
+  void divsd(XMMRegister dst, AddressLiteral src);
 
   void divss(XMMRegister dst, XMMRegister src)    { Assembler::divss(dst, src); }
   void divss(XMMRegister dst, Address src)        { Assembler::divss(dst, src); }
-  void divss(XMMRegister dst, AddressLiteral src) { Assembler::divss(dst, as_Address(src)); }
+  void divss(XMMRegister dst, AddressLiteral src);
 
   void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); }
   void movsd(Address dst, XMMRegister src)     { Assembler::movsd(dst, src); }
   void movsd(XMMRegister dst, Address src)     { Assembler::movsd(dst, src); }
-  void movsd(XMMRegister dst, AddressLiteral src) { Assembler::movsd(dst, as_Address(src)); }
+  void movsd(XMMRegister dst, AddressLiteral src);
 
   void mulsd(XMMRegister dst, XMMRegister src)    { Assembler::mulsd(dst, src); }
   void mulsd(XMMRegister dst, Address src)        { Assembler::mulsd(dst, src); }
-  void mulsd(XMMRegister dst, AddressLiteral src) { Assembler::mulsd(dst, as_Address(src)); }
+  void mulsd(XMMRegister dst, AddressLiteral src);
 
   void mulss(XMMRegister dst, XMMRegister src)    { Assembler::mulss(dst, src); }
   void mulss(XMMRegister dst, Address src)        { Assembler::mulss(dst, src); }
-  void mulss(XMMRegister dst, AddressLiteral src) { Assembler::mulss(dst, as_Address(src)); }
+  void mulss(XMMRegister dst, AddressLiteral src);
 
   void sqrtsd(XMMRegister dst, XMMRegister src)    { Assembler::sqrtsd(dst, src); }
   void sqrtsd(XMMRegister dst, Address src)        { Assembler::sqrtsd(dst, src); }
-  void sqrtsd(XMMRegister dst, AddressLiteral src) { Assembler::sqrtsd(dst, as_Address(src)); }
+  void sqrtsd(XMMRegister dst, AddressLiteral src);
 
   void sqrtss(XMMRegister dst, XMMRegister src)    { Assembler::sqrtss(dst, src); }
   void sqrtss(XMMRegister dst, Address src)        { Assembler::sqrtss(dst, src); }
-  void sqrtss(XMMRegister dst, AddressLiteral src) { Assembler::sqrtss(dst, as_Address(src)); }
+  void sqrtss(XMMRegister dst, AddressLiteral src);
 
   void subsd(XMMRegister dst, XMMRegister src)    { Assembler::subsd(dst, src); }
   void subsd(XMMRegister dst, Address src)        { Assembler::subsd(dst, src); }
-  void subsd(XMMRegister dst, AddressLiteral src) { Assembler::subsd(dst, as_Address(src)); }
+  void subsd(XMMRegister dst, AddressLiteral src);
 
   void subss(XMMRegister dst, XMMRegister src)    { Assembler::subss(dst, src); }
   void subss(XMMRegister dst, Address src)        { Assembler::subss(dst, src); }
-  void subss(XMMRegister dst, AddressLiteral src) { Assembler::subss(dst, as_Address(src)); }
+  void subss(XMMRegister dst, AddressLiteral src);
 
   void ucomiss(XMMRegister dst, XMMRegister src) { Assembler::ucomiss(dst, src); }
-  void ucomiss(XMMRegister dst, Address src) { Assembler::ucomiss(dst, src); }
+  void ucomiss(XMMRegister dst, Address src)     { Assembler::ucomiss(dst, src); }
   void ucomiss(XMMRegister dst, AddressLiteral src);
 
   void ucomisd(XMMRegister dst, XMMRegister src) { Assembler::ucomisd(dst, src); }
-  void ucomisd(XMMRegister dst, Address src) { Assembler::ucomisd(dst, src); }
+  void ucomisd(XMMRegister dst, Address src)     { Assembler::ucomisd(dst, src); }
   void ucomisd(XMMRegister dst, AddressLiteral src);
 
   // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
@@ -2279,6 +2463,53 @@
   void xorps(XMMRegister dst, Address src)     { Assembler::xorps(dst, src); }
   void xorps(XMMRegister dst, AddressLiteral src);
 
+  // AVX 3-operands instructions
+
+  void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddsd(dst, nds, src); }
+  void vaddsd(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vaddsd(dst, nds, src); }
+  void vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+  void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddss(dst, nds, src); }
+  void vaddss(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vaddss(dst, nds, src); }
+  void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+  void vandpd(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vandpd(dst, nds, src); }
+  void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+  void vandps(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vandps(dst, nds, src); }
+  void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+  void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); }
+  void vdivsd(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vdivsd(dst, nds, src); }
+  void vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+  void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivss(dst, nds, src); }
+  void vdivss(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vdivss(dst, nds, src); }
+  void vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+  void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulsd(dst, nds, src); }
+  void vmulsd(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vmulsd(dst, nds, src); }
+  void vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+  void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulss(dst, nds, src); }
+  void vmulss(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vmulss(dst, nds, src); }
+  void vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+  void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubsd(dst, nds, src); }
+  void vsubsd(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vsubsd(dst, nds, src); }
+  void vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+  void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubss(dst, nds, src); }
+  void vsubss(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vsubss(dst, nds, src); }
+  void vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+  void vxorpd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorpd(dst, nds, src); }
+  void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+  void vxorps(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorps(dst, nds, src); }
+  void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
+
   // Data
 
   void cmov32( Condition cc, Register dst, Address  src);
@@ -2342,6 +2573,9 @@
   void movl2ptr(Register dst, Address src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(movl(dst, src)); }
   void movl2ptr(Register dst, Register src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(if (dst != src) movl(dst, src)); }
 
+  // C2 compiled method's prolog code.
+  void verified_entry(int framesize, bool stack_bang, bool fp_mode_24b);
+
   // IndexOf strings.
   // Small strings are loaded through stack if they cross page boundary.
   void string_indexof(Register str1, Register str2,
--- a/src/cpu/x86/vm/assembler_x86.inline.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/x86/vm/assembler_x86.inline.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -86,6 +86,7 @@
 inline void Assembler::prefixq(Address adr, Register reg) {}
 
 inline void Assembler::prefix(Address adr, XMMRegister reg) {}
+inline void Assembler::prefixq(Address adr, XMMRegister reg) {}
 #else
 inline void Assembler::emit_long64(jlong x) {
   *(jlong*) _code_pos = x;
--- a/src/cpu/x86/vm/c1_CodeStubs_x86.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/x86/vm/c1_CodeStubs_x86.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -320,7 +320,7 @@
     // begin_initialized_entry_offset has to fit in a byte. Also, we know it's not null.
     __ load_heap_oop_not_null(tmp2, Address(_obj, java_lang_Class::klass_offset_in_bytes()));
     __ get_thread(tmp);
-    __ cmpptr(tmp, Address(tmp2, instanceKlass::init_thread_offset_in_bytes() + sizeof(klassOopDesc)));
+    __ cmpptr(tmp, Address(tmp2, instanceKlass::init_thread_offset()));
     __ pop(tmp2);
     __ pop(tmp);
     __ jcc(Assembler::notEqual, call_patch);
@@ -519,8 +519,8 @@
 
   __ load_klass(tmp_reg, src_reg);
 
-  Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset_in_bytes() + sizeof(oopDesc));
-  __ cmpl(ref_type_adr, REF_NONE);
+  Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset());
+  __ cmpb(ref_type_adr, REF_NONE);
   __ jcc(Assembler::equal, _continuation);
 
   // Is marking active?
--- a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -406,7 +406,7 @@
   // search an exception handler (rax: exception oop, rdx: throwing pc)
   __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id)));
   __ should_not_reach_here();
-  assert(code_offset() - offset <= exception_handler_size, "overflow");
+  guarantee(code_offset() - offset <= exception_handler_size, "overflow");
   __ end_a_stub();
 
   return offset;
@@ -490,8 +490,7 @@
 
   __ pushptr(here.addr());
   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
-
-  assert(code_offset() - offset <= deopt_handler_size, "overflow");
+  guarantee(code_offset() - offset <= deopt_handler_size, "overflow");
   __ end_a_stub();
 
   return offset;
@@ -1557,8 +1556,8 @@
 
 void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) {
   if (op->init_check()) {
-    __ cmpl(Address(op->klass()->as_register(),
-                    instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc)),
+    __ cmpb(Address(op->klass()->as_register(),
+                    instanceKlass::init_state_offset()),
             instanceKlass::fully_initialized);
     add_debug_info_for_null_check_here(op->stub()->info());
     __ jcc(Assembler::notEqual, *op->stub()->entry());
@@ -1730,7 +1729,7 @@
 #else
       __ cmpoop(Address(klass_RInfo, k->super_check_offset()), k->constant_encoding());
 #endif // _LP64
-      if (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() != k->super_check_offset()) {
+      if ((juint)in_bytes(Klass::secondary_super_cache_offset()) != k->super_check_offset()) {
         __ jcc(Assembler::notEqual, *failure_target);
         // successful cast, fall through to profile or jump
       } else {
@@ -1842,7 +1841,7 @@
     __ load_klass(klass_RInfo, value);
 
     // get instance klass (it's already uncompressed)
-    __ movptr(k_RInfo, Address(k_RInfo, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)));
+    __ movptr(k_RInfo, Address(k_RInfo, objArrayKlass::element_klass_offset()));
     // perform the fast part of the checking logic
     __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL);
     // call out-of-line instance of __ check_klass_subtype_slow_path(...):
@@ -3289,8 +3288,7 @@
           } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) {
             __ load_klass(tmp, dst);
           }
-          int lh_offset = klassOopDesc::header_size() * HeapWordSize +
-            Klass::layout_helper_offset_in_bytes();
+          int lh_offset = in_bytes(Klass::layout_helper_offset());
           Address klass_lh_addr(tmp, lh_offset);
           jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
           __ cmpl(klass_lh_addr, objArray_lh);
@@ -3307,9 +3305,9 @@
 
 #ifndef _LP64
         __ movptr(tmp, dst_klass_addr);
-        __ movptr(tmp, Address(tmp, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)));
+        __ movptr(tmp, Address(tmp, objArrayKlass::element_klass_offset()));
         __ push(tmp);
-        __ movl(tmp, Address(tmp, Klass::super_check_offset_offset_in_bytes() + sizeof(oopDesc)));
+        __ movl(tmp, Address(tmp, Klass::super_check_offset_offset()));
         __ push(tmp);
         __ push(length);
         __ lea(tmp, Address(dst, dst_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type)));
@@ -3333,15 +3331,15 @@
         // Allocate abi space for args but be sure to keep stack aligned
         __ subptr(rsp, 6*wordSize);
         __ load_klass(c_rarg3, dst);
-        __ movptr(c_rarg3, Address(c_rarg3, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)));
+        __ movptr(c_rarg3, Address(c_rarg3, objArrayKlass::element_klass_offset()));
         store_parameter(c_rarg3, 4);
-        __ movl(c_rarg3, Address(c_rarg3, Klass::super_check_offset_offset_in_bytes() + sizeof(oopDesc)));
+        __ movl(c_rarg3, Address(c_rarg3, Klass::super_check_offset_offset()));
         __ call(RuntimeAddress(copyfunc_addr));
         __ addptr(rsp, 6*wordSize);
 #else
         __ load_klass(c_rarg4, dst);
-        __ movptr(c_rarg4, Address(c_rarg4, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)));
-        __ movl(c_rarg3, Address(c_rarg4, Klass::super_check_offset_offset_in_bytes() + sizeof(oopDesc)));
+        __ movptr(c_rarg4, Address(c_rarg4, objArrayKlass::element_klass_offset()));
+        __ movl(c_rarg3, Address(c_rarg4, Klass::super_check_offset_offset()));
         __ call(RuntimeAddress(copyfunc_addr));
 #endif
 
@@ -3715,6 +3713,25 @@
   // __ store_fence();
 }
 
+void LIR_Assembler::membar_loadload() {
+  // no-op
+  //__ membar(Assembler::Membar_mask_bits(Assembler::loadload));
+}
+
+void LIR_Assembler::membar_storestore() {
+  // no-op
+  //__ membar(Assembler::Membar_mask_bits(Assembler::storestore));
+}
+
+void LIR_Assembler::membar_loadstore() {
+  // no-op
+  //__ membar(Assembler::Membar_mask_bits(Assembler::loadstore));
+}
+
+void LIR_Assembler::membar_storeload() {
+  __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
+}
+
 void LIR_Assembler::get_thread(LIR_Opr result_reg) {
   assert(result_reg->is_register(), "check");
 #ifdef _LP64
--- a/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -156,7 +156,7 @@
   assert_different_registers(obj, klass, len);
   if (UseBiasedLocking && !len->is_valid()) {
     assert_different_registers(obj, klass, len, t1, t2);
-    movptr(t1, Address(klass, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+    movptr(t1, Address(klass, Klass::prototype_header_offset()));
     movptr(Address(obj, oopDesc::mark_offset_in_bytes()), t1);
   } else {
     // This assumes that all prototype bits fit in an int32_t
@@ -289,12 +289,14 @@
       jcc(Assembler::notZero, loop);
     }
   }
-  
-  // (tw) fix me
-//  if (CURRENT_ENV->dtrace_alloc_probes()) {
-//    assert(obj == rax, "must be");
-//    call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id)));
-//  }
+
+#ifndef GRAAL
+  // TODO(tw): Check how we can access the flag without a ciEnv object.
+  if (CURRENT_ENV->dtrace_alloc_probes()) {
+    assert(obj == rax, "must be");
+    call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id)));
+  }
+#endif
 
   verify_oop(obj);
 }
@@ -324,11 +326,14 @@
   const Register len_zero = len;
   initialize_body(obj, arr_size, header_size * BytesPerWord, len_zero);
 
-  // TODO(tw): Re-enable this code once Graal no longer uses this method.
-//  if (CURRENT_ENV->dtrace_alloc_probes()) {
-//    assert(obj == rax, "must be");
-//    call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id)));
-//  }
+
+#ifndef GRAAL
+  // TODO(tw): Check how we can access the flag without a ciEnv object.
+  if (CURRENT_ENV->dtrace_alloc_probes()) {
+    assert(obj == rax, "must be");
+    call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id)));
+  }
+#endif
 
   verify_oop(obj);
 }
@@ -389,6 +394,16 @@
 
 
 void C1_MacroAssembler::verified_entry() {
+  if (C1Breakpoint || VerifyFPU || !UseStackBanging) {
+    // Verified Entry first instruction should be 5 bytes long for correct
+    // patching by patch_verified_entry().
+    //
+    // C1Breakpoint and VerifyFPU have one byte first instruction.
+    // Also first instruction will be one byte "push(rbp)" if stack banging
+    // code is not generated (see build_frame() above).
+    // For all these cases generate long instruction first.
+    fat_nop();
+  }
   if (C1Breakpoint)int3();
   // build frame
   verify_FPU(0, "method_entry");
--- a/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -594,47 +594,46 @@
 OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address target, bool has_argument) {
   OopMapSet* oop_maps = new OopMapSet();
 #ifdef GRAAL
-    // graal passes the argument in r10
-    OopMap* oop_map = save_live_registers(sasm, 1);
+  OopMap* oop_map = save_live_registers(sasm, 1);
 
-    // now all registers are saved and can be used freely
-    // verify that no old value is used accidentally
-    __ invalidate_registers(true, true, true, true, true, true);
+  // now all registers are saved and can be used freely
+  // verify that no old value is used accidentally
+  __ invalidate_registers(true, true, true, true, true, true);
 
-    // registers used by this stub
-    const Register temp_reg = rbx;
+  // registers used by this stub
+  const Register temp_reg = rbx;
 
-    // load argument for exception that is passed as an argument into the stub
-    if (has_argument) {
-      __ movptr(c_rarg1, r10);
-    }
-    int call_offset = __ call_RT(noreg, noreg, target, has_argument ? 1 : 0);
+  // load argument for exception that is passed as an argument into the stub
+  if (has_argument) {
+    __ movptr(c_rarg1, r10);
+  }
+  int call_offset = __ call_RT(noreg, noreg, target, has_argument ? 1 : 0);
 
-    oop_maps->add_gc_map(call_offset, oop_map);
+  oop_maps->add_gc_map(call_offset, oop_map);
 #else
-    // preserve all registers
-    int num_rt_args = has_argument ? 2 : 1;
-    OopMap* oop_map = save_live_registers(sasm, num_rt_args);
+  // preserve all registers
+  int num_rt_args = has_argument ? 2 : 1;
+  OopMap* oop_map = save_live_registers(sasm, num_rt_args);
 
-    // now all registers are saved and can be used freely
-    // verify that no old value is used accidentally
-    __ invalidate_registers(true, true, true, true, true, true);
+  // now all registers are saved and can be used freely
+  // verify that no old value is used accidentally
+  __ invalidate_registers(true, true, true, true, true, true);
 
-    // registers used by this stub
-    const Register temp_reg = rbx;
+  // registers used by this stub
+  const Register temp_reg = rbx;
 
-    // load argument for exception that is passed as an argument into the stub
-    if (has_argument) {
-  #ifdef _LP64
-      __ movptr(c_rarg1, Address(rbp, 2*BytesPerWord));
-  #else
-      __ movptr(temp_reg, Address(rbp, 2*BytesPerWord));
-      __ push(temp_reg);
-  #endif // _LP64
-    }
-    int call_offset = __ call_RT(noreg, noreg, target, num_rt_args - 1);
+  // load argument for exception that is passed as an argument into the stub
+  if (has_argument) {
+#ifdef _LP64
+    __ movptr(c_rarg1, Address(rbp, 2*BytesPerWord));
+#else
+    __ movptr(temp_reg, Address(rbp, 2*BytesPerWord));
+    __ push(temp_reg);
+#endif // _LP64
+  }
+  int call_offset = __ call_RT(noreg, noreg, target, num_rt_args - 1);
 
-    oop_maps->add_gc_map(call_offset, oop_map);
+  oop_maps->add_gc_map(call_offset, oop_map);
 #endif
 
   __ stop("should not reach here");
@@ -980,6 +979,7 @@
   return oop_maps;
 }
 
+#ifdef GRAAL
 JRT_ENTRY(void, graal_create_null_exception(JavaThread* thread))
   thread->set_vm_result(Exceptions::new_exception(thread, vmSymbols::java_lang_NullPointerException(), NULL)());
 JRT_END
@@ -1008,9 +1008,7 @@
 
   thread->set_vm_result((oop) result.get_jobject());
 JRT_END
-
-
-
+#endif
 
 OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
 
@@ -1061,7 +1059,7 @@
 
           if (id == fast_new_instance_init_check_id) {
             // make sure the klass is initialized
-            __ cmpl(Address(klass, instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc)), instanceKlass::fully_initialized);
+            __ cmpb(Address(klass, instanceKlass::init_state_offset()), instanceKlass::fully_initialized);
             __ jcc(Assembler::notEqual, slow_path);
           }
 
@@ -1069,7 +1067,7 @@
           // assert object can be fast path allocated
           {
             Label ok, not_ok;
-            __ movl(obj_size, Address(klass, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc)));
+            __ movl(obj_size, Address(klass, Klass::layout_helper_offset()));
             __ cmpl(obj_size, 0);  // make sure it's an instance (LH > 0)
             __ jcc(Assembler::lessEqual, not_ok);
             __ testl(obj_size, Klass::_lh_instance_slow_path_bit);
@@ -1090,7 +1088,7 @@
           __ bind(retry_tlab);
 
           // get the instance size (size is postive so movl is fine for 64bit)
-          __ movl(obj_size, Address(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes()));
+          __ movl(obj_size, Address(klass, Klass::layout_helper_offset()));
 
           __ tlab_allocate(obj, obj_size, 0, t1, t2, slow_path);
 
@@ -1102,7 +1100,7 @@
 
           __ bind(try_eden);
           // get the instance size (size is postive so movl is fine for 64bit)
-          __ movl(obj_size, Address(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes()));
+          __ movl(obj_size, Address(klass, Klass::layout_helper_offset()));
 
           __ eden_allocate(obj, obj_size, 0, t1, slow_path);
           __ incr_allocated_bytes(thread, obj_size, 0);
@@ -1169,7 +1167,7 @@
         {
           Label ok;
           Register t0 = obj;
-          __ movl(t0, Address(klass, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc)));
+          __ movl(t0, Address(klass, Klass::layout_helper_offset()));
           __ sarl(t0, Klass::_lh_array_tag_shift);
           int tag = ((id == new_type_array_id)
                      ? Klass::_lh_array_tag_type_value
@@ -1203,7 +1201,7 @@
 
           // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F))
           // since size is positive movl does right thing on 64bit
-          __ movl(t1, Address(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes()));
+          __ movl(t1, Address(klass, Klass::layout_helper_offset()));
           // since size is postive movl does right thing on 64bit
           __ movl(arr_size, length);
           assert(t1 == rcx, "fixed register usage");
@@ -1217,7 +1215,7 @@
           __ tlab_allocate(obj, arr_size, 0, t1, t2, slow_path);  // preserves arr_size
 
           __ initialize_header(obj, klass, length, t1, t2);
-          __ movb(t1, Address(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes() + (Klass::_lh_header_size_shift / BitsPerByte)));
+          __ movb(t1, Address(klass, in_bytes(Klass::layout_helper_offset()) + (Klass::_lh_header_size_shift / BitsPerByte)));
           assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise");
           assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise");
           __ andptr(t1, Klass::_lh_header_size_mask);
@@ -1230,7 +1228,7 @@
           __ bind(try_eden);
           // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F))
           // since size is positive movl does right thing on 64bit
-          __ movl(t1, Address(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes()));
+          __ movl(t1, Address(klass, Klass::layout_helper_offset()));
           // since size is postive movl does right thing on 64bit
           __ movl(arr_size, length);
           assert(t1 == rcx, "fixed register usage");
@@ -1245,7 +1243,7 @@
           __ incr_allocated_bytes(thread, arr_size, 0);
 
           __ initialize_header(obj, klass, length, t1, t2);
-          __ movb(t1, Address(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes() + (Klass::_lh_header_size_shift / BitsPerByte)));
+          __ movb(t1, Address(klass, in_bytes(Klass::layout_helper_offset()) + (Klass::_lh_header_size_shift / BitsPerByte)));
           assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise");
           assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise");
           __ andptr(t1, Klass::_lh_header_size_mask);
@@ -1322,7 +1320,7 @@
         Label register_finalizer;
         Register t = rsi;
         __ load_klass(t, rax);
-        __ movl(t, Address(t, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc)));
+        __ movl(t, Address(t, Klass::access_flags_offset()));
         __ testl(t, JVM_ACC_HAS_FINALIZER);
         __ jcc(Assembler::notZero, register_finalizer);
         __ ret(0);
@@ -1381,8 +1379,7 @@
       break;
 
     case unwind_exception_id:
-      {
-        __ set_info("unwind_exception", dont_gc_arguments);
+      { __ set_info("unwind_exception", dont_gc_arguments);
         // note: no stubframe since we are about to leave the current
         //       activation and we are calling a leaf VM function only.
         generate_unwind_exception(sasm);
@@ -1441,17 +1438,18 @@
         __ movptr(rsi, Address(rsp, (klass_off) * VMRegImpl::stack_slot_size)); // subclass
         __ movptr(rax, Address(rsp, (sup_k_off) * VMRegImpl::stack_slot_size)); // superclass
 
-        Label success;
         Label miss;
 #ifdef GRAAL
-          // TODO this should really be within the XirSnippets
+        Label success;
           __ check_klass_subtype_fast_path(rsi, rax, rcx, &success, &miss, NULL);
 #endif
 
         __ check_klass_subtype_slow_path(rsi, rax, rcx, rdi, NULL, &miss);
 
         // fallthrough on success:
+#ifdef GRAAL
         __ bind(success);
+#endif
         __ movptr(Address(rsp, (result_off) * VMRegImpl::stack_slot_size), 1); // result
         __ pop(rax);
         __ pop(rcx);
@@ -1850,7 +1848,7 @@
       }
       break;
 #endif // !SERIALGC
-
+#ifdef GRAAL
     case graal_unwind_exception_call_id: {
       // remove the frame from the stack
       __ movptr(rsp, rbp);
@@ -2053,9 +2051,7 @@
       __ ret(0);
       break;
     }
-
-
-
+#endif
 
     default:
       { StubFrame f(sasm, "unimplemented entry", dont_gc_arguments);
--- a/src/cpu/x86/vm/c2_globals_x86.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/x86/vm/c2_globals_x86.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -45,7 +45,7 @@
 #else
 define_pd_global(bool, ProfileInterpreter,           true);
 #endif // CC_INTERP
-define_pd_global(bool, TieredCompilation,            true);
+define_pd_global(bool, TieredCompilation,            trueInTiered);
 define_pd_global(intx, CompileThreshold,             10000);
 define_pd_global(intx, BackEdgeThreshold,            100000);
 
--- a/src/cpu/x86/vm/cppInterpreter_x86.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/x86/vm/cppInterpreter_x86.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -511,7 +511,7 @@
     // get synchronization object
 
     Label done;
-    const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes();
+    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
     __ movl(rax, access_flags);
     __ testl(rax, JVM_ACC_STATIC);
     __ movptr(rax, Address(locals, 0));                   // get receiver (assume this is frequent case)
@@ -763,7 +763,7 @@
 #endif // ASSERT
   // get synchronization object
   { Label done;
-    const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes();
+    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
     __ movl(rax, access_flags);
     __ movptr(rdi, STATE(_locals));                                     // prepare to get receiver (assume common case)
     __ testl(rax, JVM_ACC_STATIC);
@@ -1180,7 +1180,7 @@
 
   // pass mirror handle if static call
   { Label L;
-    const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes();
+    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
     __ movl(t, Address(method, methodOopDesc::access_flags_offset()));
     __ testl(t, JVM_ACC_STATIC);
     __ jcc(Assembler::zero, L);
--- a/src/cpu/x86/vm/frame_x86.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/x86/vm/frame_x86.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -28,6 +28,7 @@
 #include "oops/markOop.hpp"
 #include "oops/methodOop.hpp"
 #include "oops/oop.inline.hpp"
+#include "prims/methodHandles.hpp"
 #include "runtime/frame.inline.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/javaCalls.hpp"
@@ -656,13 +657,15 @@
   return &interpreter_frame_tos_address()[index];
 }
 
-#ifdef ASSERT
+#ifndef PRODUCT
 
 #define DESCRIBE_FP_OFFSET(name) \
   values.describe(frame_no, fp() + frame::name##_offset, #name)
 
 void frame::describe_pd(FrameValues& values, int frame_no) {
-  if (is_interpreted_frame()) {
+  if (is_ricochet_frame()) {
+    MethodHandles::RicochetFrame::describe(this, values, frame_no);
+  } else if (is_interpreted_frame()) {
     DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp);
     DESCRIBE_FP_OFFSET(interpreter_frame_last_sp);
     DESCRIBE_FP_OFFSET(interpreter_frame_method);
@@ -672,7 +675,6 @@
     DESCRIBE_FP_OFFSET(interpreter_frame_bcx);
     DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp);
   }
-
 }
 #endif
 
@@ -680,3 +682,21 @@
   // used to reset the saved FP
   return fp();
 }
+
+intptr_t* frame::real_fp() const {
+  if (_cb != NULL) {
+    // use the frame size if valid
+    int size = _cb->frame_size();
+    if ((size > 0) &&
+        (! is_ricochet_frame())) {
+      // Work-around: ricochet explicitly excluded because frame size is not
+      // constant for the ricochet blob but its frame_size could not, for
+      // some reasons, be declared as <= 0. This potentially confusing
+      // size declaration should be fixed as another CR.
+      return unextended_sp() + size;
+    }
+  }
+  // else rely on fp()
+  assert(! is_compiled_frame(), "unknown compiled frame size");
+  return fp();
+}
--- a/src/cpu/x86/vm/frame_x86.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/x86/vm/frame_x86.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -188,6 +188,7 @@
   frame(intptr_t* sp, intptr_t* fp);
 
   // accessors for the instance variables
+  // Note: not necessarily the real 'frame pointer' (see real_fp)
   intptr_t*   fp() const { return _fp; }
 
   inline address* sender_pc_addr() const;
--- a/src/cpu/x86/vm/globals_x86.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/x86/vm/globals_x86.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -60,9 +60,9 @@
 #ifdef AMD64
 // Very large C++ stack frames using solaris-amd64 optimized builds
 // due to lack of optimization caused by C++ compiler bugs
-define_pd_global(intx, StackShadowPages, SOLARIS_ONLY(20) NOT_SOLARIS(6) DEBUG_ONLY(+2));
+define_pd_global(intx, StackShadowPages, NOT_WIN64(20) WIN64_ONLY(6) DEBUG_ONLY(+2));
 #else
-define_pd_global(intx, StackShadowPages, 3 DEBUG_ONLY(+5));
+define_pd_global(intx, StackShadowPages, 4 DEBUG_ONLY(+5));
 #endif // AMD64
 
 define_pd_global(intx, PreInflateSpin,           10);
--- a/src/cpu/x86/vm/methodHandles_x86.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/x86/vm/methodHandles_x86.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -279,14 +279,16 @@
 }
 
 // Emit code to verify that RBP is pointing at a valid ricochet frame.
-#ifdef ASSERT
+#ifndef PRODUCT
 enum {
   ARG_LIMIT = 255, SLOP = 4,
   // use this parameter for checking for garbage stack movements:
   UNREASONABLE_STACK_MOVE = (ARG_LIMIT + SLOP)
   // the slop defends against false alarms due to fencepost errors
 };
+#endif
 
+#ifdef ASSERT
 void MethodHandles::RicochetFrame::verify_clean(MacroAssembler* _masm) {
   // The stack should look like this:
   //    ... keep1 | dest=42 | keep2 | RF | magic | handler | magic | recursive args |
@@ -990,53 +992,103 @@
   BLOCK_COMMENT("} move_return_value");
 }
 
+#ifndef PRODUCT
+#define DESCRIBE_RICOCHET_OFFSET(rf, name) \
+  values.describe(frame_no, (intptr_t *) (((uintptr_t)rf) + MethodHandles::RicochetFrame::name##_offset_in_bytes()), #name)
+
+void MethodHandles::RicochetFrame::describe(const frame* fr, FrameValues& values, int frame_no)  {
+    address bp = (address) fr->fp();
+    RicochetFrame* rf = (RicochetFrame*)(bp - sender_link_offset_in_bytes());
+
+    // ricochet slots
+    DESCRIBE_RICOCHET_OFFSET(rf, exact_sender_sp);
+    DESCRIBE_RICOCHET_OFFSET(rf, conversion);
+    DESCRIBE_RICOCHET_OFFSET(rf, saved_args_base);
+    DESCRIBE_RICOCHET_OFFSET(rf, saved_args_layout);
+    DESCRIBE_RICOCHET_OFFSET(rf, saved_target);
+    DESCRIBE_RICOCHET_OFFSET(rf, continuation);
+
+    // relevant ricochet targets (in caller frame)
+    values.describe(-1, rf->saved_args_base(),  err_msg("*saved_args_base for #%d", frame_no));
+}
+#endif // ASSERT
 
 #ifndef PRODUCT
 extern "C" void print_method_handle(oop mh);
 void trace_method_handle_stub(const char* adaptername,
                               oop mh,
                               intptr_t* saved_regs,
-                              intptr_t* entry_sp,
-                              intptr_t* saved_sp,
-                              intptr_t* saved_bp) {
+                              intptr_t* entry_sp) {
   // called as a leaf from native code: do not block the JVM!
   bool has_mh = (strstr(adaptername, "return/") == NULL);  // return adapters don't have rcx_mh
-  intptr_t* last_sp = (intptr_t*) saved_bp[frame::interpreter_frame_last_sp_offset];
-  intptr_t* base_sp = last_sp;
-  typedef MethodHandles::RicochetFrame RicochetFrame;
-  RicochetFrame* rfp = (RicochetFrame*)((address)saved_bp - RicochetFrame::sender_link_offset_in_bytes());
-  if (!UseRicochetFrames || Universe::heap()->is_in((address) rfp->saved_args_base())) {
-    // Probably an interpreter frame.
-    base_sp = (intptr_t*) saved_bp[frame::interpreter_frame_monitor_block_top_offset];
-  }
-  intptr_t    mh_reg = (intptr_t)mh;
-  const char* mh_reg_name = "rcx_mh";
-  if (!has_mh)  mh_reg_name = "rcx";
-  tty->print_cr("MH %s %s="PTR_FORMAT" sp=("PTR_FORMAT"+"INTX_FORMAT") stack_size="INTX_FORMAT" bp="PTR_FORMAT,
-                adaptername, mh_reg_name, mh_reg,
-                (intptr_t)entry_sp, (intptr_t)(saved_sp - entry_sp), (intptr_t)(base_sp - last_sp), (intptr_t)saved_bp);
+  const char* mh_reg_name = has_mh ? "rcx_mh" : "rcx";
+  tty->print_cr("MH %s %s="PTR_FORMAT" sp="PTR_FORMAT, adaptername, mh_reg_name, mh, entry_sp);
+
   if (Verbose) {
-    tty->print(" reg dump: ");
-    int saved_regs_count = (entry_sp-1) - saved_regs;
-    // 32 bit: rdi rsi rbp rsp; rbx rdx rcx (*) rax
-    int i;
-    for (i = 0; i <= saved_regs_count; i++) {
-      if (i > 0 && i % 4 == 0 && i != saved_regs_count) {
+    tty->print_cr("Registers:");
+    const int saved_regs_count = RegisterImpl::number_of_registers;
+    for (int i = 0; i < saved_regs_count; i++) {
+      Register r = as_Register(i);
+      // The registers are stored in reverse order on the stack (by pusha).
+      tty->print("%3s=" PTR_FORMAT, r->name(), saved_regs[((saved_regs_count - 1) - i)]);
+      if ((i + 1) % 4 == 0) {
         tty->cr();
-        tty->print("   + dump: ");
+      } else {
+        tty->print(", ");
       }
-      tty->print(" %d: "PTR_FORMAT, i, saved_regs[i]);
     }
     tty->cr();
-    if (last_sp != saved_sp && last_sp != NULL)
-      tty->print_cr("*** last_sp="PTR_FORMAT, (intptr_t)last_sp);
-    int stack_dump_count = 16;
-    if (stack_dump_count < (int)(saved_bp + 2 - saved_sp))
-      stack_dump_count = (int)(saved_bp + 2 - saved_sp);
-    if (stack_dump_count > 64)  stack_dump_count = 48;
-    for (i = 0; i < stack_dump_count; i += 4) {
-      tty->print_cr(" dump at SP[%d] "PTR_FORMAT": "PTR_FORMAT" "PTR_FORMAT" "PTR_FORMAT" "PTR_FORMAT,
-                    i, (intptr_t) &entry_sp[i+0], entry_sp[i+0], entry_sp[i+1], entry_sp[i+2], entry_sp[i+3]);
+
+    {
+     // dumping last frame with frame::describe
+
+      JavaThread* p = JavaThread::active();
+
+      ResourceMark rm;
+      PRESERVE_EXCEPTION_MARK; // may not be needed by safer and unexpensive here
+      FrameValues values;
+
+      // Note: We want to allow trace_method_handle from any call site.
+      // While trace_method_handle creates a frame, it may be entered
+      // without a PC on the stack top (e.g. not just after a call).
+      // Walking that frame could lead to failures due to that invalid PC.
+      // => carefully detect that frame when doing the stack walking
+
+      // Current C frame
+      frame cur_frame = os::current_frame();
+
+      // Robust search of trace_calling_frame (independant of inlining).
+      // Assumes saved_regs comes from a pusha in the trace_calling_frame.
+      assert(cur_frame.sp() < saved_regs, "registers not saved on stack ?");
+      frame trace_calling_frame = os::get_sender_for_C_frame(&cur_frame);
+      while (trace_calling_frame.fp() < saved_regs) {
+        trace_calling_frame = os::get_sender_for_C_frame(&trace_calling_frame);
+      }
+
+      // safely create a frame and call frame::describe
+      intptr_t *dump_sp = trace_calling_frame.sender_sp();
+      intptr_t *dump_fp = trace_calling_frame.link();
+
+      bool walkable = has_mh; // whether the traced frame shoud be walkable
+
+      if (walkable) {
+        // The previous definition of walkable may have to be refined
+        // if new call sites cause the next frame constructor to start
+        // failing. Alternatively, frame constructors could be
+        // modified to support the current or future non walkable
+        // frames (but this is more intrusive and is not considered as
+        // part of this RFE, which will instead use a simpler output).
+        frame dump_frame = frame(dump_sp, dump_fp);
+        dump_frame.describe(values, 1);
+      } else {
+        // Stack may not be walkable (invalid PC above FP):
+        // Add descriptions without building a Java frame to avoid issues
+        values.describe(-1, dump_fp, "fp for #1 <not parsed, cannot trust pc>");
+        values.describe(-1, dump_sp, "sp for #1");
+      }
+
+      tty->print_cr("Stack layout:");
+      values.print(p);
     }
     if (has_mh)
       print_method_handle(mh);
@@ -1051,41 +1103,58 @@
   oopDesc* mh;
   intptr_t* saved_regs;
   intptr_t* entry_sp;
-  intptr_t* saved_sp;
-  intptr_t* saved_bp;
 };
 void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) {
   trace_method_handle_stub(args->adaptername,
                            args->mh,
                            args->saved_regs,
-                           args->entry_sp,
-                           args->saved_sp,
-                           args->saved_bp);
+                           args->entry_sp);
 }
 
 void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) {
   if (!TraceMethodHandles)  return;
   BLOCK_COMMENT("trace_method_handle {");
-  __ push(rax);
-  __ lea(rax, Address(rsp, wordSize * NOT_LP64(6) LP64_ONLY(14))); // entry_sp  __ pusha();
+  __ enter();
+  __ andptr(rsp, -16); // align stack if needed for FPU state
   __ pusha();
-  __ mov(rbx, rsp);
-  __ enter();
-  // incoming state:
+  __ mov(rbx, rsp); // for retreiving saved_regs
+  // Note: saved_regs must be in the entered frame for the
+  // robust stack walking implemented in trace_method_handle_stub.
+
+  // save FP result, valid at some call sites (adapter_opt_return_float, ...)
+  __ increment(rsp, -2 * wordSize);
+  if  (UseSSE >= 2) {
+    __ movdbl(Address(rsp, 0), xmm0);
+  } else if (UseSSE == 1) {
+    __ movflt(Address(rsp, 0), xmm0);
+  } else {
+    __ fst_d(Address(rsp, 0));
+  }
+
+  // Incoming state:
   // rcx: method handle
-  // r13 or rsi: saved sp
-  // To avoid calling convention issues, build a record on the stack and pass the pointer to that instead.
-  __ push(rbp);               // saved_bp
-  __ push(rsi);               // saved_sp
-  __ push(rax);               // entry_sp
+  //
+  // To avoid calling convention issues, build a record on the stack
+  // and pass the pointer to that instead.
+  __ push(rbp);               // entry_sp (with extra align space)
   __ push(rbx);               // pusha saved_regs
   __ push(rcx);               // mh
-  __ push(rcx);               // adaptername
+  __ push(rcx);               // slot for adaptername
   __ movptr(Address(rsp, 0), (intptr_t) adaptername);
   __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, trace_method_handle_stub_wrapper), rsp);
-  __ leave();
+  __ increment(rsp, sizeof(MethodHandleStubArguments));
+
+  if  (UseSSE >= 2) {
+    __ movdbl(xmm0, Address(rsp, 0));
+  } else if (UseSSE == 1) {
+    __ movflt(xmm0, Address(rsp, 0));
+  } else {
+    __ fld_d(Address(rsp, 0));
+  }
+  __ increment(rsp, 2 * wordSize);
+
   __ popa();
-  __ pop(rax);
+  __ leave();
   BLOCK_COMMENT("} trace_method_handle");
 }
 #endif //PRODUCT
@@ -1104,7 +1173,7 @@
          |(1<<java_lang_invoke_AdapterMethodHandle::OP_DROP_ARGS)
           //OP_COLLECT_ARGS is below...
          |(1<<java_lang_invoke_AdapterMethodHandle::OP_SPREAD_ARGS)
-         |(!UseRicochetFrames ? 0 :
+         |(
            java_lang_invoke_MethodTypeForm::vmlayout_offset_in_bytes() <= 0 ? 0 :
            ((1<<java_lang_invoke_AdapterMethodHandle::OP_PRIM_TO_REF)
            |(1<<java_lang_invoke_AdapterMethodHandle::OP_COLLECT_ARGS)
@@ -1160,7 +1229,7 @@
   Address rcx_amh_conversion( rcx_recv, java_lang_invoke_AdapterMethodHandle::conversion_offset_in_bytes() );
   Address vmarg;                // __ argument_address(vmargslot)
 
-  const int java_mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes();
+  const int java_mirror_offset = in_bytes(Klass::java_mirror_offset());
 
   if (have_entry(ek)) {
     __ nop();                   // empty stubs make SG sick
@@ -2267,23 +2336,19 @@
 
       // grab another temp
       Register rsi_temp = rsi;
-      { if (rsi_temp == saved_last_sp)  __ push(saved_last_sp); }
-      // (preceding push must be done after argslot address is taken!)
-#define UNPUSH_RSI \
-      { if (rsi_temp == saved_last_sp)  __ pop(saved_last_sp); }
 
       // arx_argslot points both to the array and to the first output arg
       vmarg = Address(rax_argslot, 0);
 
       // Get the array value.
-      Register  rsi_array       = rsi_temp;
+      Register  rdi_array       = rdi_temp;
       Register  rdx_array_klass = rdx_temp;
       BasicType elem_type = ek_adapter_opt_spread_type(ek);
       int       elem_slots = type2size[elem_type];  // 1 or 2
       int       array_slots = 1;  // array is always a T_OBJECT
       int       length_offset   = arrayOopDesc::length_offset_in_bytes();
       int       elem0_offset    = arrayOopDesc::base_offset_in_bytes(elem_type);
-      __ movptr(rsi_array, vmarg);
+      __ movptr(rdi_array, vmarg);
 
       Label L_array_is_empty, L_insert_arg_space, L_copy_args, L_args_done;
       if (length_can_be_zero) {
@@ -2294,12 +2359,30 @@
           __ testl(rbx_temp, rbx_temp);
           __ jcc(Assembler::notZero, L_skip);
         }
-        __ testptr(rsi_array, rsi_array);
-        __ jcc(Assembler::zero, L_array_is_empty);
+        __ testptr(rdi_array, rdi_array);
+        __ jcc(Assembler::notZero, L_skip);
+
+        // If 'rsi' contains the 'saved_last_sp' (this is only the
+        // case in a 32-bit version of the VM) we have to save 'rsi'
+        // on the stack because later on (at 'L_array_is_empty') 'rsi'
+        // will be overwritten.
+        { if (rsi_temp == saved_last_sp)  __ push(saved_last_sp); }
+        // Also prepare a handy macro which restores 'rsi' if required.
+#define UNPUSH_RSI                                                      \
+        { if (rsi_temp == saved_last_sp)  __ pop(saved_last_sp); }
+
+        __ jmp(L_array_is_empty);
         __ bind(L_skip);
       }
-      __ null_check(rsi_array, oopDesc::klass_offset_in_bytes());
-      __ load_klass(rdx_array_klass, rsi_array);
+      __ null_check(rdi_array, oopDesc::klass_offset_in_bytes());
+      __ load_klass(rdx_array_klass, rdi_array);
+
+      // Save 'rsi' if required (see comment above).  Do this only
+      // after the null check such that the exception handler which is
+      // called in the case of a null pointer exception will not be
+      // confused by the extra value on the stack (it expects the
+      // return pointer on top of the stack)
+      { if (rsi_temp == saved_last_sp)  __ push(saved_last_sp); }
 
       // Check the array type.
       Register rbx_klass = rbx_temp;
@@ -2307,18 +2390,18 @@
       load_klass_from_Class(_masm, rbx_klass);
 
       Label ok_array_klass, bad_array_klass, bad_array_length;
-      __ check_klass_subtype(rdx_array_klass, rbx_klass, rdi_temp, ok_array_klass);
+      __ check_klass_subtype(rdx_array_klass, rbx_klass, rsi_temp, ok_array_klass);
       // If we get here, the type check failed!
       __ jmp(bad_array_klass);
       __ BIND(ok_array_klass);
 
       // Check length.
       if (length_constant >= 0) {
-        __ cmpl(Address(rsi_array, length_offset), length_constant);
+        __ cmpl(Address(rdi_array, length_offset), length_constant);
       } else {
         Register rbx_vminfo = rbx_temp;
         load_conversion_vminfo(_masm, rbx_vminfo, rcx_amh_conversion);
-        __ cmpl(rbx_vminfo, Address(rsi_array, length_offset));
+        __ cmpl(rbx_vminfo, Address(rdi_array, length_offset));
       }
       __ jcc(Assembler::notEqual, bad_array_length);
 
@@ -2330,9 +2413,9 @@
         __ lea(rdx_argslot_limit, Address(rax_argslot, Interpreter::stackElementSize));
         // 'stack_move' is negative number of words to insert
         // This number already accounts for elem_slots.
-        Register rdi_stack_move = rdi_temp;
-        load_stack_move(_masm, rdi_stack_move, rcx_recv, true);
-        __ cmpptr(rdi_stack_move, 0);
+        Register rsi_stack_move = rsi_temp;
+        load_stack_move(_masm, rsi_stack_move, rcx_recv, true);
+        __ cmpptr(rsi_stack_move, 0);
         assert(stack_move_unit() < 0, "else change this comparison");
         __ jcc(Assembler::less, L_insert_arg_space);
         __ jcc(Assembler::equal, L_copy_args);
@@ -2343,12 +2426,12 @@
         __ jmp(L_args_done);  // no spreading to do
         __ BIND(L_insert_arg_space);
         // come here in the usual case, stack_move < 0 (2 or more spread arguments)
-        Register rsi_temp = rsi_array;  // spill this
-        insert_arg_slots(_masm, rdi_stack_move,
-                         rax_argslot, rbx_temp, rsi_temp);
+        Register rdi_temp = rdi_array;  // spill this
+        insert_arg_slots(_masm, rsi_stack_move,
+                         rax_argslot, rbx_temp, rdi_temp);
         // reload the array since rsi was killed
         // reload from rdx_argslot_limit since rax_argslot is now decremented
-        __ movptr(rsi_array, Address(rdx_argslot_limit, -Interpreter::stackElementSize));
+        __ movptr(rdi_array, Address(rdx_argslot_limit, -Interpreter::stackElementSize));
       } else if (length_constant >= 1) {
         int new_slots = (length_constant * elem_slots) - array_slots;
         insert_arg_slots(_masm, new_slots * stack_move_unit(),
@@ -2371,16 +2454,16 @@
       if (length_constant == -1) {
         // [rax_argslot, rdx_argslot_limit) is the area we are inserting into.
         // Array element [0] goes at rdx_argslot_limit[-wordSize].
-        Register rsi_source = rsi_array;
-        __ lea(rsi_source, Address(rsi_array, elem0_offset));
+        Register rdi_source = rdi_array;
+        __ lea(rdi_source, Address(rdi_array, elem0_offset));
         Register rdx_fill_ptr = rdx_argslot_limit;
         Label loop;
         __ BIND(loop);
         __ addptr(rdx_fill_ptr, -Interpreter::stackElementSize * elem_slots);
         move_typed_arg(_masm, elem_type, true,
-                       Address(rdx_fill_ptr, 0), Address(rsi_source, 0),
-                       rbx_temp, rdi_temp);
-        __ addptr(rsi_source, type2aelembytes(elem_type));
+                       Address(rdx_fill_ptr, 0), Address(rdi_source, 0),
+                       rbx_temp, rsi_temp);
+        __ addptr(rdi_source, type2aelembytes(elem_type));
         __ cmpptr(rdx_fill_ptr, rax_argslot);
         __ jcc(Assembler::above, loop);
       } else if (length_constant == 0) {
@@ -2391,8 +2474,8 @@
         for (int index = 0; index < length_constant; index++) {
           slot_offset -= Interpreter::stackElementSize * elem_slots;  // fill backward
           move_typed_arg(_masm, elem_type, true,
-                         Address(rax_argslot, slot_offset), Address(rsi_array, elem_offset),
-                         rbx_temp, rdi_temp);
+                         Address(rax_argslot, slot_offset), Address(rdi_array, elem_offset),
+                         rbx_temp, rsi_temp);
           elem_offset += type2aelembytes(elem_type);
         }
       }
--- a/src/cpu/x86/vm/methodHandles_x86.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/x86/vm/methodHandles_x86.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -224,6 +224,8 @@
   }
 
   static void verify_clean(MacroAssembler* _masm) NOT_DEBUG_RETURN;
+
+  static void describe(const frame* fr, FrameValues& values, int frame_no) PRODUCT_RETURN;
 };
 
 // Additional helper methods for MethodHandles code generation:
--- a/src/cpu/x86/vm/nativeInst_x86.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/x86/vm/nativeInst_x86.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -237,9 +237,21 @@
   int off = 0;
   u_char instr_0 = ubyte_at(off);
 
+  // See comment in Assembler::locate_operand() about VEX prefixes.
+  if (instr_0 == instruction_VEX_prefix_2bytes) {
+    assert((UseAVX > 0), "shouldn't have VEX prefix");
+    NOT_LP64(assert((0xC0 & ubyte_at(1)) == 0xC0, "shouldn't have LDS and LES instructions"));
+    return 2;
+  }
+  if (instr_0 == instruction_VEX_prefix_3bytes) {
+    assert((UseAVX > 0), "shouldn't have VEX prefix");
+    NOT_LP64(assert((0xC0 & ubyte_at(1)) == 0xC0, "shouldn't have LDS and LES instructions"));
+    return 3;
+  }
+
   // First check to see if we have a (prefixed or not) xor
-  if ( instr_0 >= instruction_prefix_wide_lo &&      // 0x40
-       instr_0 <= instruction_prefix_wide_hi) { // 0x4f
+  if (instr_0 >= instruction_prefix_wide_lo && // 0x40
+      instr_0 <= instruction_prefix_wide_hi) { // 0x4f
     off++;
     instr_0 = ubyte_at(off);
   }
@@ -256,13 +268,13 @@
     instr_0 = ubyte_at(off);
   }
 
-  if ( instr_0 == instruction_code_xmm_ss_prefix ||      // 0xf3
+  if ( instr_0 == instruction_code_xmm_ss_prefix || // 0xf3
        instr_0 == instruction_code_xmm_sd_prefix) { // 0xf2
     off++;
     instr_0 = ubyte_at(off);
   }
 
-  if ( instr_0 >= instruction_prefix_wide_lo &&      // 0x40
+  if ( instr_0 >= instruction_prefix_wide_lo && // 0x40
        instr_0 <= instruction_prefix_wide_hi) { // 0x4f
     off++;
     instr_0 = ubyte_at(off);
--- a/src/cpu/x86/vm/nativeInst_x86.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/x86/vm/nativeInst_x86.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -287,6 +287,9 @@
     instruction_code_xmm_store          = 0x11,
     instruction_code_xmm_lpd            = 0x12,
 
+    instruction_VEX_prefix_2bytes       = Assembler::VEX_2bytes,
+    instruction_VEX_prefix_3bytes       = Assembler::VEX_3bytes,
+
     instruction_size                    = 4,
     instruction_offset                  = 0,
     data_offset                         = 2,
--- a/src/cpu/x86/vm/register_definitions_x86.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/x86/vm/register_definitions_x86.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -53,6 +53,7 @@
 REGISTER_DEFINITION(Register, r15);
 #endif // AMD64
 
+REGISTER_DEFINITION(XMMRegister, xnoreg);
 REGISTER_DEFINITION(XMMRegister, xmm0 );
 REGISTER_DEFINITION(XMMRegister, xmm1 );
 REGISTER_DEFINITION(XMMRegister, xmm2 );
@@ -115,6 +116,7 @@
 REGISTER_DEFINITION(Register, r15_thread);
 #endif // AMD64
 
+REGISTER_DEFINITION(MMXRegister, mnoreg );
 REGISTER_DEFINITION(MMXRegister, mmx0 );
 REGISTER_DEFINITION(MMXRegister, mmx1 );
 REGISTER_DEFINITION(MMXRegister, mmx2 );
--- a/src/cpu/x86/vm/sharedRuntime_x86_32.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/x86/vm/sharedRuntime_x86_32.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1091,12 +1091,238 @@
   }
 }
 
+
+static void save_or_restore_arguments(MacroAssembler* masm,
+                                      const int stack_slots,
+                                      const int total_in_args,
+                                      const int arg_save_area,
+                                      OopMap* map,
+                                      VMRegPair* in_regs,
+                                      BasicType* in_sig_bt) {
+  // if map is non-NULL then the code should store the values,
+  // otherwise it should load them.
+  int handle_index = 0;
+  // Save down double word first
+  for ( int i = 0; i < total_in_args; i++) {
+    if (in_regs[i].first()->is_XMMRegister() && in_sig_bt[i] == T_DOUBLE) {
+      int slot = handle_index * VMRegImpl::slots_per_word + arg_save_area;
+      int offset = slot * VMRegImpl::stack_slot_size;
+      handle_index += 2;
+      assert(handle_index <= stack_slots, "overflow");
+      if (map != NULL) {
+        __ movdbl(Address(rsp, offset), in_regs[i].first()->as_XMMRegister());
+      } else {
+        __ movdbl(in_regs[i].first()->as_XMMRegister(), Address(rsp, offset));
+      }
+    }
+    if (in_regs[i].first()->is_Register() && in_sig_bt[i] == T_LONG) {
+      int slot = handle_index * VMRegImpl::slots_per_word + arg_save_area;
+      int offset = slot * VMRegImpl::stack_slot_size;
+      handle_index += 2;
+      assert(handle_index <= stack_slots, "overflow");
+      if (map != NULL) {
+        __ movl(Address(rsp, offset), in_regs[i].first()->as_Register());
+        if (in_regs[i].second()->is_Register()) {
+          __ movl(Address(rsp, offset + 4), in_regs[i].second()->as_Register());
+        }
+      } else {
+        __ movl(in_regs[i].first()->as_Register(), Address(rsp, offset));
+        if (in_regs[i].second()->is_Register()) {
+          __ movl(in_regs[i].second()->as_Register(), Address(rsp, offset + 4));
+        }
+      }
+    }
+  }
+  // Save or restore single word registers
+  for ( int i = 0; i < total_in_args; i++) {
+    if (in_regs[i].first()->is_Register()) {
+      int slot = handle_index++ * VMRegImpl::slots_per_word + arg_save_area;
+      int offset = slot * VMRegImpl::stack_slot_size;
+      assert(handle_index <= stack_slots, "overflow");
+      if (in_sig_bt[i] == T_ARRAY && map != NULL) {
+        map->set_oop(VMRegImpl::stack2reg(slot));;
+      }
+
+      // Value is in an input register pass we must flush it to the stack
+      const Register reg = in_regs[i].first()->as_Register();
+      switch (in_sig_bt[i]) {
+        case T_ARRAY:
+          if (map != NULL) {
+            __ movptr(Address(rsp, offset), reg);
+          } else {
+            __ movptr(reg, Address(rsp, offset));
+          }
+          break;
+        case T_BOOLEAN:
+        case T_CHAR:
+        case T_BYTE:
+        case T_SHORT:
+        case T_INT:
+          if (map != NULL) {
+            __ movl(Address(rsp, offset), reg);
+          } else {
+            __ movl(reg, Address(rsp, offset));
+          }
+          break;
+        case T_OBJECT:
+        default: ShouldNotReachHere();
+      }
+    } else if (in_regs[i].first()->is_XMMRegister()) {
+      if (in_sig_bt[i] == T_FLOAT) {
+        int slot = handle_index++ * VMRegImpl::slots_per_word + arg_save_area;
+        int offset = slot * VMRegImpl::stack_slot_size;
+        assert(handle_index <= stack_slots, "overflow");
+        if (map != NULL) {
+          __ movflt(Address(rsp, offset), in_regs[i].first()->as_XMMRegister());
+        } else {
+          __ movflt(in_regs[i].first()->as_XMMRegister(), Address(rsp, offset));
+        }
+      }
+    } else if (in_regs[i].first()->is_stack()) {
+      if (in_sig_bt[i] == T_ARRAY && map != NULL) {
+        int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
+        map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
+      }
+    }
+  }
+}
+
+// Check GC_locker::needs_gc and enter the runtime if it's true.  This
+// keeps a new JNI critical region from starting until a GC has been
+// forced.  Save down any oops in registers and describe them in an
+// OopMap.
+static void check_needs_gc_for_critical_native(MacroAssembler* masm,
+                                               Register thread,
+                                               int stack_slots,
+                                               int total_c_args,
+                                               int total_in_args,
+                                               int arg_save_area,
+                                               OopMapSet* oop_maps,
+                                               VMRegPair* in_regs,
+                                               BasicType* in_sig_bt) {
+  __ block_comment("check GC_locker::needs_gc");
+  Label cont;
+  __ cmp8(ExternalAddress((address)GC_locker::needs_gc_address()), false);
+  __ jcc(Assembler::equal, cont);
+
+  // Save down any incoming oops and call into the runtime to halt for a GC
+
+  OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
+
+  save_or_restore_arguments(masm, stack_slots, total_in_args,
+                            arg_save_area, map, in_regs, in_sig_bt);
+
+  address the_pc = __ pc();
+  oop_maps->add_gc_map( __ offset(), map);
+  __ set_last_Java_frame(thread, rsp, noreg, the_pc);
+
+  __ block_comment("block_for_jni_critical");
+  __ push(thread);
+  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical)));
+  __ increment(rsp, wordSize);
+
+  __ get_thread(thread);
+  __ reset_last_Java_frame(thread, false, true);
+
+  save_or_restore_arguments(masm, stack_slots, total_in_args,
+                            arg_save_area, NULL, in_regs, in_sig_bt);
+
+  __ bind(cont);
+#ifdef ASSERT
+  if (StressCriticalJNINatives) {
+    // Stress register saving
+    OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
+    save_or_restore_arguments(masm, stack_slots, total_in_args,
+                              arg_save_area, map, in_regs, in_sig_bt);
+    // Destroy argument registers
+    for (int i = 0; i < total_in_args - 1; i++) {
+      if (in_regs[i].first()->is_Register()) {
+        const Register reg = in_regs[i].first()->as_Register();
+        __ xorptr(reg, reg);
+      } else if (in_regs[i].first()->is_XMMRegister()) {
+        __ xorpd(in_regs[i].first()->as_XMMRegister(), in_regs[i].first()->as_XMMRegister());
+      } else if (in_regs[i].first()->is_FloatRegister()) {
+        ShouldNotReachHere();
+      } else if (in_regs[i].first()->is_stack()) {
+        // Nothing to do
+      } else {
+        ShouldNotReachHere();
+      }
+      if (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_DOUBLE) {
+        i++;
+      }
+    }
+
+    save_or_restore_arguments(masm, stack_slots, total_in_args,
+                              arg_save_area, NULL, in_regs, in_sig_bt);
+  }
+#endif
+}
+
+// Unpack an array argument into a pointer to the body and the length
+// if the array is non-null, otherwise pass 0 for both.
+static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) {
+  Register tmp_reg = rax;
+  assert(!body_arg.first()->is_Register() || body_arg.first()->as_Register() != tmp_reg,
+         "possible collision");
+  assert(!length_arg.first()->is_Register() || length_arg.first()->as_Register() != tmp_reg,
+         "possible collision");
+
+  // Pass the length, ptr pair
+  Label is_null, done;
+  VMRegPair tmp(tmp_reg->as_VMReg());
+  if (reg.first()->is_stack()) {
+    // Load the arg up from the stack
+    simple_move32(masm, reg, tmp);
+    reg = tmp;
+  }
+  __ testptr(reg.first()->as_Register(), reg.first()->as_Register());
+  __ jccb(Assembler::equal, is_null);
+  __ lea(tmp_reg, Address(reg.first()->as_Register(), arrayOopDesc::base_offset_in_bytes(in_elem_type)));
+  simple_move32(masm, tmp, body_arg);
+  // load the length relative to the body.
+  __ movl(tmp_reg, Address(tmp_reg, arrayOopDesc::length_offset_in_bytes() -
+                           arrayOopDesc::base_offset_in_bytes(in_elem_type)));
+  simple_move32(masm, tmp, length_arg);
+  __ jmpb(done);
+  __ bind(is_null);
+  // Pass zeros
+  __ xorptr(tmp_reg, tmp_reg);
+  simple_move32(masm, tmp, body_arg);
+  simple_move32(masm, tmp, length_arg);
+  __ bind(done);
+}
+
+
 // ---------------------------------------------------------------------------
 // Generate a native wrapper for a given method.  The method takes arguments
 // in the Java compiled code convention, marshals them to the native
 // convention (handlizes oops, etc), transitions to native, makes the call,
 // returns to java state (possibly blocking), unhandlizes any result and
 // returns.
+//
+// Critical native functions are a shorthand for the use of
+// GetPrimtiveArrayCritical and disallow the use of any other JNI
+// functions.  The wrapper is expected to unpack the arguments before
+// passing them to the callee and perform checks before and after the
+// native call to ensure that they GC_locker
+// lock_critical/unlock_critical semantics are followed.  Some other
+// parts of JNI setup are skipped like the tear down of the JNI handle
+// block and the check for pending exceptions it's impossible for them
+// to be thrown.
+//
+// They are roughly structured like this:
+//    if (GC_locker::needs_gc())
+//      SharedRuntime::block_for_jni_critical();
+//    tranistion to thread_in_native
+//    unpack arrray arguments and call native entry point
+//    check for safepoint in progress
+//    check if any thread suspend flags are set
+//      call into JVM and possible unlock the JNI critical
+//      if a GC was suppressed while in the critical native.
+//    transition back to thread_in_Java
+//    return to caller
+//
 nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
                                                 methodHandle method,
                                                 int compile_id,
@@ -1105,6 +1331,13 @@
                                                 BasicType *in_sig_bt,
                                                 VMRegPair *in_regs,
                                                 BasicType ret_type) {
+  bool is_critical_native = true;
+  address native_func = method->critical_native_function();
+  if (native_func == NULL) {
+    native_func = method->native_function();
+    is_critical_native = false;
+  }
+  assert(native_func != NULL, "must have function");
 
   // An OopMap for lock (and class if static)
   OopMapSet *oop_maps = new OopMapSet();
@@ -1115,30 +1348,72 @@
   // we convert the java signature to a C signature by inserting
   // the hidden arguments as arg[0] and possibly arg[1] (static method)
 
-  int total_c_args = total_in_args + 1;
-  if (method->is_static()) {
-    total_c_args++;
+  int total_c_args = total_in_args;
+  if (!is_critical_native) {
+    total_c_args += 1;
+    if (method->is_static()) {
+      total_c_args++;
+    }
+  } else {
+    for (int i = 0; i < total_in_args; i++) {
+      if (in_sig_bt[i] == T_ARRAY) {
+        total_c_args++;
+      }
+    }
   }
 
   BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
-  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair,   total_c_args);
+  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
+  BasicType* in_elem_bt = NULL;
 
   int argc = 0;
-  out_sig_bt[argc++] = T_ADDRESS;
-  if (method->is_static()) {
-    out_sig_bt[argc++] = T_OBJECT;
+  if (!is_critical_native) {
+    out_sig_bt[argc++] = T_ADDRESS;
+    if (method->is_static()) {
+      out_sig_bt[argc++] = T_OBJECT;
+    }
+
+    for (int i = 0; i < total_in_args ; i++ ) {
+      out_sig_bt[argc++] = in_sig_bt[i];
+    }
+  } else {
+    Thread* THREAD = Thread::current();
+    in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
+    SignatureStream ss(method->signature());
+    for (int i = 0; i < total_in_args ; i++ ) {
+      if (in_sig_bt[i] == T_ARRAY) {
+        // Arrays are passed as int, elem* pair
+        out_sig_bt[argc++] = T_INT;
+        out_sig_bt[argc++] = T_ADDRESS;
+        Symbol* atype = ss.as_symbol(CHECK_NULL);
+        const char* at = atype->as_C_string();
+        if (strlen(at) == 2) {
+          assert(at[0] == '[', "must be");
+          switch (at[1]) {
+            case 'B': in_elem_bt[i]  = T_BYTE; break;
+            case 'C': in_elem_bt[i]  = T_CHAR; break;
+            case 'D': in_elem_bt[i]  = T_DOUBLE; break;
+            case 'F': in_elem_bt[i]  = T_FLOAT; break;
+            case 'I': in_elem_bt[i]  = T_INT; break;
+            case 'J': in_elem_bt[i]  = T_LONG; break;
+            case 'S': in_elem_bt[i]  = T_SHORT; break;
+            case 'Z': in_elem_bt[i]  = T_BOOLEAN; break;
+            default: ShouldNotReachHere();
+          }
+        }
+      } else {
+        out_sig_bt[argc++] = in_sig_bt[i];
+        in_elem_bt[i] = T_VOID;
+      }
+      if (in_sig_bt[i] != T_VOID) {
+        assert(in_sig_bt[i] == ss.type(), "must match");
+        ss.next();
+      }
+    }
   }
 
-  int i;
-  for (i = 0; i < total_in_args ; i++ ) {
-    out_sig_bt[argc++] = in_sig_bt[i];
-  }
-
-
   // Now figure out where the args must be stored and how much stack space
-  // they require (neglecting out_preserve_stack_slots but space for storing
-  // the 1st six register arguments). It's weird see int_stk_helper.
-  //
+  // they require.
   int out_arg_slots;
   out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
 
@@ -1151,9 +1426,44 @@
   int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
 
   // Now the space for the inbound oop handle area
+  int total_save_slots = 2 * VMRegImpl::slots_per_word; // 2 arguments passed in registers
+  if (is_critical_native) {
+    // Critical natives may have to call out so they need a save area
+    // for register arguments.
+    int double_slots = 0;
+    int single_slots = 0;
+    for ( int i = 0; i < total_in_args; i++) {
+      if (in_regs[i].first()->is_Register()) {
+        const Register reg = in_regs[i].first()->as_Register();
+        switch (in_sig_bt[i]) {
+          case T_ARRAY:
+          case T_BOOLEAN:
+          case T_BYTE:
+          case T_SHORT:
+          case T_CHAR:
+          case T_INT:  single_slots++; break;
+          case T_LONG: double_slots++; break;
+          default:  ShouldNotReachHere();
+        }
+      } else if (in_regs[i].first()->is_XMMRegister()) {
+        switch (in_sig_bt[i]) {
+          case T_FLOAT:  single_slots++; break;
+          case T_DOUBLE: double_slots++; break;
+          default:  ShouldNotReachHere();
+        }
+      } else if (in_regs[i].first()->is_FloatRegister()) {
+        ShouldNotReachHere();
+      }
+    }
+    total_save_slots = double_slots * 2 + single_slots;
+    // align the save area
+    if (double_slots != 0) {
+      stack_slots = round_to(stack_slots, 2);
+    }
+  }
 
   int oop_handle_offset = stack_slots;
-  stack_slots += 2*VMRegImpl::slots_per_word;
+  stack_slots += total_save_slots;
 
   // Now any space we need for handlizing a klass if static method
 
@@ -1161,7 +1471,6 @@
   int klass_offset = -1;
   int lock_slot_offset = 0;
   bool is_static = false;
-  int oop_temp_slot_offset = 0;
 
   if (method->is_static()) {
     klass_slot_offset = stack_slots;
@@ -1221,7 +1530,7 @@
   // First thing make an ic check to see if we should even be here
 
   // We are free to use all registers as temps without saving them and
-  // restoring them except rbp,. rbp, is the only callee save register
+  // restoring them except rbp. rbp is the only callee save register
   // as far as the interpreter and the compiler(s) are concerned.
 
 
@@ -1230,7 +1539,6 @@
   Label hit;
   Label exception_pending;
 
-
   __ verify_oop(receiver);
   __ cmpptr(ic_reg, Address(receiver, oopDesc::klass_offset_in_bytes()));
   __ jcc(Assembler::equal, hit);
@@ -1292,11 +1600,10 @@
 
   // Generate a new frame for the wrapper.
   __ enter();
-  // -2 because return address is already present and so is saved rbp,
+  // -2 because return address is already present and so is saved rbp
   __ subptr(rsp, stack_size - 2*wordSize);
 
-  // Frame is now completed as far a size and linkage.
-
+  // Frame is now completed as far as size and linkage.
   int frame_complete = ((intptr_t)__ pc()) - start;
 
   // Calculate the difference between rsp and rbp,. We need to know it
@@ -1319,7 +1626,6 @@
   // Compute the rbp, offset for any slots used after the jni call
 
   int lock_slot_rbp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment;
-  int oop_temp_slot_rbp_offset = (oop_temp_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment;
 
   // We use rdi as a thread pointer because it is callee save and
   // if we load it once it is usable thru the entire wrapper
@@ -1332,6 +1638,10 @@
 
   __ get_thread(thread);
 
+  if (is_critical_native) {
+    check_needs_gc_for_critical_native(masm, thread, stack_slots, total_c_args, total_in_args,
+                                       oop_handle_offset, oop_maps, in_regs, in_sig_bt);
+  }
 
   //
   // We immediately shuffle the arguments so that any vm call we have to
@@ -1353,7 +1663,7 @@
   // vectors we have in our possession. We simply walk the java vector to
   // get the source locations and the c vector to get the destinations.
 
-  int c_arg = method->is_static() ? 2 : 1 ;
+  int c_arg = is_critical_native ? 0 : (method->is_static() ? 2 : 1 );
 
   // Record rsp-based slot for receiver on stack for non-static methods
   int receiver_offset = -1;
@@ -1373,10 +1683,16 @@
   // Are free to temporaries if we have to do  stack to steck moves.
   // All inbound args are referenced based on rbp, and all outbound args via rsp.
 
-  for (i = 0; i < total_in_args ; i++, c_arg++ ) {
+  for (int i = 0; i < total_in_args ; i++, c_arg++ ) {
     switch (in_sig_bt[i]) {
       case T_ARRAY:
+        if (is_critical_native) {
+          unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
+          c_arg++;
+          break;
+        }
       case T_OBJECT:
+        assert(!is_critical_native, "no oop arguments");
         object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
                     ((i == 0) && (!is_static)),
                     &receiver_offset);
@@ -1408,7 +1724,7 @@
 
   // Pre-load a static method's oop into rsi.  Used both by locking code and
   // the normal JNI call code.
-  if (method->is_static()) {
+  if (method->is_static() && !is_critical_native) {
 
     //  load opp into a register
     __ movoop(oop_handle_reg, JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror()));
@@ -1463,6 +1779,7 @@
 
   // Lock a synchronized method
   if (method->is_synchronized()) {
+    assert(!is_critical_native, "unhandled");
 
 
     const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
@@ -1529,14 +1846,15 @@
 
 
   // get JNIEnv* which is first argument to native
-
-  __ lea(rdx, Address(thread, in_bytes(JavaThread::jni_environment_offset())));
-  __ movptr(Address(rsp, 0), rdx);
+  if (!is_critical_native) {
+    __ lea(rdx, Address(thread, in_bytes(JavaThread::jni_environment_offset())));
+    __ movptr(Address(rsp, 0), rdx);
+  }
 
   // Now set thread in native
   __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native);
 
-  __ call(RuntimeAddress(method->native_function()));
+  __ call(RuntimeAddress(native_func));
 
   // WARNING - on Windows Java Natives use pascal calling convention and pop the
   // arguments off of the stack. We could just re-adjust the stack pointer here
@@ -1591,6 +1909,8 @@
     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
   }
 
+  Label after_transition;
+
   // check for safepoint operation in progress and/or pending suspend requests
   { Label Continue;
 
@@ -1611,17 +1931,29 @@
     //
     save_native_result(masm, ret_type, stack_slots);
     __ push(thread);
-    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address,
-                                            JavaThread::check_special_condition_for_native_trans)));
+    if (!is_critical_native) {
+      __ call(RuntimeAddress(CAST_FROM_FN_PTR(address,
+                                              JavaThread::check_special_condition_for_native_trans)));
+    } else {
+      __ call(RuntimeAddress(CAST_FROM_FN_PTR(address,
+                                              JavaThread::check_special_condition_for_native_trans_and_transition)));
+    }
     __ increment(rsp, wordSize);
     // Restore any method result value
     restore_native_result(masm, ret_type, stack_slots);
 
+    if (is_critical_native) {
+      // The call above performed the transition to thread_in_Java so
+      // skip the transition logic below.
+      __ jmpb(after_transition);
+    }
+
     __ bind(Continue);
   }
 
   // change thread state
   __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_Java);
+  __ bind(after_transition);
 
   Label reguard;
   Label reguard_done;
@@ -1710,15 +2042,15 @@
       __ verify_oop(rax);
   }
 
-  // reset handle block
-  __ movptr(rcx, Address(thread, JavaThread::active_handles_offset()));
-
-  __ movptr(Address(rcx, JNIHandleBlock::top_offset_in_bytes()), NULL_WORD);
-
-  // Any exception pending?
-  __ cmpptr(Address(thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
-  __ jcc(Assembler::notEqual, exception_pending);
-
+  if (!is_critical_native) {
+    // reset handle block
+    __ movptr(rcx, Address(thread, JavaThread::active_handles_offset()));
+    __ movptr(Address(rcx, JNIHandleBlock::top_offset_in_bytes()), NULL_WORD);
+
+    // Any exception pending?
+    __ cmpptr(Address(thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
+    __ jcc(Assembler::notEqual, exception_pending);
+  }
 
   // no exception, we're almost done
 
@@ -1829,16 +2161,18 @@
 
   // BEGIN EXCEPTION PROCESSING
 
-  // Forward  the exception
-  __ bind(exception_pending);
-
-  // remove possible return value from FPU register stack
-  __ empty_FPU_stack();
-
-  // pop our frame
-  __ leave();
-  // and forward the exception
-  __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
+  if (!is_critical_native) {
+    // Forward  the exception
+    __ bind(exception_pending);
+
+    // remove possible return value from FPU register stack
+    __ empty_FPU_stack();
+
+    // pop our frame
+    __ leave();
+    // and forward the exception
+    __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
+  }
 
   __ flush();
 
@@ -1851,6 +2185,11 @@
                                             (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
                                             in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
                                             oop_maps);
+
+  if (is_critical_native) {
+    nm->set_lazy_critical_native(true);
+  }
+
   return nm;
 
 }
--- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -939,6 +939,25 @@
   }
 }
 
+static void move_ptr(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
+  if (src.first()->is_stack()) {
+    if (dst.first()->is_stack()) {
+      // stack to stack
+      __ movq(rax, Address(rbp, reg2offset_in(src.first())));
+      __ movq(Address(rsp, reg2offset_out(dst.first())), rax);
+    } else {
+      // stack to reg
+      __ movq(dst.first()->as_Register(), Address(rbp, reg2offset_in(src.first())));
+    }
+  } else if (dst.first()->is_stack()) {
+    // reg to stack
+    __ movq(Address(rsp, reg2offset_out(dst.first())), src.first()->as_Register());
+  } else {
+    if (dst.first() != src.first()) {
+      __ movq(dst.first()->as_Register(), src.first()->as_Register());
+    }
+  }
+}
 
 // An oop arg. Must pass a handle not the oop itself
 static void object_move(MacroAssembler* masm,
@@ -1153,6 +1172,203 @@
     }
 }
 
+
+static void save_or_restore_arguments(MacroAssembler* masm,
+                                      const int stack_slots,
+                                      const int total_in_args,
+                                      const int arg_save_area,
+                                      OopMap* map,
+                                      VMRegPair* in_regs,
+                                      BasicType* in_sig_bt) {
+  // if map is non-NULL then the code should store the values,
+  // otherwise it should load them.
+  int handle_index = 0;
+  // Save down double word first
+  for ( int i = 0; i < total_in_args; i++) {
+    if (in_regs[i].first()->is_XMMRegister() && in_sig_bt[i] == T_DOUBLE) {
+      int slot = handle_index * VMRegImpl::slots_per_word + arg_save_area;
+      int offset = slot * VMRegImpl::stack_slot_size;
+      handle_index += 2;
+      assert(handle_index <= stack_slots, "overflow");
+      if (map != NULL) {
+        __ movdbl(Address(rsp, offset), in_regs[i].first()->as_XMMRegister());
+      } else {
+        __ movdbl(in_regs[i].first()->as_XMMRegister(), Address(rsp, offset));
+      }
+    }
+    if (in_regs[i].first()->is_Register() &&
+        (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_ARRAY)) {
+      int slot = handle_index * VMRegImpl::slots_per_word + arg_save_area;
+      int offset = slot * VMRegImpl::stack_slot_size;
+      handle_index += 2;
+      assert(handle_index <= stack_slots, "overflow");
+      if (map != NULL) {
+        __ movq(Address(rsp, offset), in_regs[i].first()->as_Register());
+        if (in_sig_bt[i] == T_ARRAY) {
+          map->set_oop(VMRegImpl::stack2reg(slot));;
+        }
+      } else {
+        __ movq(in_regs[i].first()->as_Register(), Address(rsp, offset));
+      }
+    }
+  }
+  // Save or restore single word registers
+  for ( int i = 0; i < total_in_args; i++) {
+    if (in_regs[i].first()->is_Register()) {
+      int slot = handle_index++ * VMRegImpl::slots_per_word + arg_save_area;
+      int offset = slot * VMRegImpl::stack_slot_size;
+      assert(handle_index <= stack_slots, "overflow");
+
+      // Value is in an input register pass we must flush it to the stack
+      const Register reg = in_regs[i].first()->as_Register();
+      switch (in_sig_bt[i]) {
+        case T_BOOLEAN:
+        case T_CHAR:
+        case T_BYTE:
+        case T_SHORT:
+        case T_INT:
+          if (map != NULL) {
+            __ movl(Address(rsp, offset), reg);
+          } else {
+            __ movl(reg, Address(rsp, offset));
+          }
+          break;
+        case T_ARRAY:
+        case T_LONG:
+          // handled above
+          break;
+        case T_OBJECT:
+        default: ShouldNotReachHere();
+      }
+    } else if (in_regs[i].first()->is_XMMRegister()) {
+      if (in_sig_bt[i] == T_FLOAT) {
+        int slot = handle_index++ * VMRegImpl::slots_per_word + arg_save_area;
+        int offset = slot * VMRegImpl::stack_slot_size;
+        assert(handle_index <= stack_slots, "overflow");
+        if (map != NULL) {
+          __ movflt(Address(rsp, offset), in_regs[i].first()->as_XMMRegister());
+        } else {
+          __ movflt(in_regs[i].first()->as_XMMRegister(), Address(rsp, offset));
+        }
+      }
+    } else if (in_regs[i].first()->is_stack()) {
+      if (in_sig_bt[i] == T_ARRAY && map != NULL) {
+        int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
+        map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
+      }
+    }
+  }
+}
+
+
+// Check GC_locker::needs_gc and enter the runtime if it's true.  This
+// keeps a new JNI critical region from starting until a GC has been
+// forced.  Save down any oops in registers and describe them in an
+// OopMap.
+static void check_needs_gc_for_critical_native(MacroAssembler* masm,
+                                               int stack_slots,
+                                               int total_c_args,
+                                               int total_in_args,
+                                               int arg_save_area,
+                                               OopMapSet* oop_maps,
+                                               VMRegPair* in_regs,
+                                               BasicType* in_sig_bt) {
+  __ block_comment("check GC_locker::needs_gc");
+  Label cont;
+  __ cmp8(ExternalAddress((address)GC_locker::needs_gc_address()), false);
+  __ jcc(Assembler::equal, cont);
+
+  // Save down any incoming oops and call into the runtime to halt for a GC
+
+  OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
+  save_or_restore_arguments(masm, stack_slots, total_in_args,
+                            arg_save_area, map, in_regs, in_sig_bt);
+
+  address the_pc = __ pc();
+  oop_maps->add_gc_map( __ offset(), map);
+  __ set_last_Java_frame(rsp, noreg, the_pc);
+
+  __ block_comment("block_for_jni_critical");
+  __ movptr(c_rarg0, r15_thread);
+  __ mov(r12, rsp); // remember sp
+  __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
+  __ andptr(rsp, -16); // align stack as required by ABI
+  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical)));
+  __ mov(rsp, r12); // restore sp
+  __ reinit_heapbase();
+
+  __ reset_last_Java_frame(false, true);
+
+  save_or_restore_arguments(masm, stack_slots, total_in_args,
+                            arg_save_area, NULL, in_regs, in_sig_bt);
+
+  __ bind(cont);
+#ifdef ASSERT
+  if (StressCriticalJNINatives) {
+    // Stress register saving
+    OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
+    save_or_restore_arguments(masm, stack_slots, total_in_args,
+                              arg_save_area, map, in_regs, in_sig_bt);
+    // Destroy argument registers
+    for (int i = 0; i < total_in_args - 1; i++) {
+      if (in_regs[i].first()->is_Register()) {
+        const Register reg = in_regs[i].first()->as_Register();
+        __ xorptr(reg, reg);
+      } else if (in_regs[i].first()->is_XMMRegister()) {
+        __ xorpd(in_regs[i].first()->as_XMMRegister(), in_regs[i].first()->as_XMMRegister());
+      } else if (in_regs[i].first()->is_FloatRegister()) {
+        ShouldNotReachHere();
+      } else if (in_regs[i].first()->is_stack()) {
+        // Nothing to do
+      } else {
+        ShouldNotReachHere();
+      }
+      if (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_DOUBLE) {
+        i++;
+      }
+    }
+
+    save_or_restore_arguments(masm, stack_slots, total_in_args,
+                              arg_save_area, NULL, in_regs, in_sig_bt);
+  }
+#endif
+}
+
+// Unpack an array argument into a pointer to the body and the length
+// if the array is non-null, otherwise pass 0 for both.
+static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) {
+  Register tmp_reg = rax;
+  assert(!body_arg.first()->is_Register() || body_arg.first()->as_Register() != tmp_reg,
+         "possible collision");
+  assert(!length_arg.first()->is_Register() || length_arg.first()->as_Register() != tmp_reg,
+         "possible collision");
+
+  // Pass the length, ptr pair
+  Label is_null, done;
+  VMRegPair tmp;
+  tmp.set_ptr(tmp_reg->as_VMReg());
+  if (reg.first()->is_stack()) {
+    // Load the arg up from the stack
+    move_ptr(masm, reg, tmp);
+    reg = tmp;
+  }
+  __ testptr(reg.first()->as_Register(), reg.first()->as_Register());
+  __ jccb(Assembler::equal, is_null);
+  __ lea(tmp_reg, Address(reg.first()->as_Register(), arrayOopDesc::base_offset_in_bytes(in_elem_type)));
+  move_ptr(masm, tmp, body_arg);
+  // load the length relative to the body.
+  __ movl(tmp_reg, Address(tmp_reg, arrayOopDesc::length_offset_in_bytes() -
+                           arrayOopDesc::base_offset_in_bytes(in_elem_type)));
+  move32_64(masm, tmp, length_arg);
+  __ jmpb(done);
+  __ bind(is_null);
+  // Pass zeros
+  __ xorptr(tmp_reg, tmp_reg);
+  move_ptr(masm, tmp, body_arg);
+  move32_64(masm, tmp, length_arg);
+  __ bind(done);
+}
+
 // ---------------------------------------------------------------------------
 // Generate a native wrapper for a given method.  The method takes arguments
 // in the Java compiled code convention, marshals them to the native
@@ -1167,10 +1383,14 @@
                                                 BasicType *in_sig_bt,
                                                 VMRegPair *in_regs,
                                                 BasicType ret_type) {
-  // Native nmethod wrappers never take possesion of the oop arguments.
-  // So the caller will gc the arguments. The only thing we need an
-  // oopMap for is if the call is static
-  //
+  bool is_critical_native = true;
+  address native_func = method->critical_native_function();
+  if (native_func == NULL) {
+    native_func = method->native_function();
+    is_critical_native = false;
+  }
+  assert(native_func != NULL, "must have function");
+
   // An OopMap for lock (and class if static)
   OopMapSet *oop_maps = new OopMapSet();
   intptr_t start = (intptr_t)__ pc();
@@ -1181,27 +1401,72 @@
   // we convert the java signature to a C signature by inserting
   // the hidden arguments as arg[0] and possibly arg[1] (static method)
 
-  int total_c_args = total_in_args + 1;
-  if (method->is_static()) {
-    total_c_args++;
+  int total_c_args = total_in_args;
+  if (!is_critical_native) {
+    total_c_args += 1;
+    if (method->is_static()) {
+      total_c_args++;
+    }
+  } else {
+    for (int i = 0; i < total_in_args; i++) {
+      if (in_sig_bt[i] == T_ARRAY) {
+        total_c_args++;
+      }
+    }
   }
 
   BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
-  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair,   total_c_args);
+  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
+  BasicType* in_elem_bt = NULL;
 
   int argc = 0;
-  out_sig_bt[argc++] = T_ADDRESS;
-  if (method->is_static()) {
-    out_sig_bt[argc++] = T_OBJECT;
-  }
-
-  for (int i = 0; i < total_in_args ; i++ ) {
-    out_sig_bt[argc++] = in_sig_bt[i];
+  if (!is_critical_native) {
+    out_sig_bt[argc++] = T_ADDRESS;
+    if (method->is_static()) {
+      out_sig_bt[argc++] = T_OBJECT;
+    }
+
+    for (int i = 0; i < total_in_args ; i++ ) {
+      out_sig_bt[argc++] = in_sig_bt[i];
+    }
+  } else {
+    Thread* THREAD = Thread::current();
+    in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
+    SignatureStream ss(method->signature());
+    for (int i = 0; i < total_in_args ; i++ ) {
+      if (in_sig_bt[i] == T_ARRAY) {
+        // Arrays are passed as int, elem* pair
+        out_sig_bt[argc++] = T_INT;
+        out_sig_bt[argc++] = T_ADDRESS;
+        Symbol* atype = ss.as_symbol(CHECK_NULL);
+        const char* at = atype->as_C_string();
+        if (strlen(at) == 2) {
+          assert(at[0] == '[', "must be");
+          switch (at[1]) {
+            case 'B': in_elem_bt[i]  = T_BYTE; break;
+            case 'C': in_elem_bt[i]  = T_CHAR; break;
+            case 'D': in_elem_bt[i]  = T_DOUBLE; break;
+            case 'F': in_elem_bt[i]  = T_FLOAT; break;
+            case 'I': in_elem_bt[i]  = T_INT; break;
+            case 'J': in_elem_bt[i]  = T_LONG; break;
+            case 'S': in_elem_bt[i]  = T_SHORT; break;
+            case 'Z': in_elem_bt[i]  = T_BOOLEAN; break;
+            default: ShouldNotReachHere();
+          }
+        }
+      } else {
+        out_sig_bt[argc++] = in_sig_bt[i];
+        in_elem_bt[i] = T_VOID;
+      }
+      if (in_sig_bt[i] != T_VOID) {
+        assert(in_sig_bt[i] == ss.type(), "must match");
+        ss.next();
+      }
+    }
   }
 
   // Now figure out where the args must be stored and how much stack space
   // they require.
-  //
   int out_arg_slots;
   out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
 
@@ -1214,13 +1479,47 @@
   int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
 
   // Now the space for the inbound oop handle area
+  int total_save_slots = 6 * VMRegImpl::slots_per_word;  // 6 arguments passed in registers
+  if (is_critical_native) {
+    // Critical natives may have to call out so they need a save area
+    // for register arguments.
+    int double_slots = 0;
+    int single_slots = 0;
+    for ( int i = 0; i < total_in_args; i++) {
+      if (in_regs[i].first()->is_Register()) {
+        const Register reg = in_regs[i].first()->as_Register();
+        switch (in_sig_bt[i]) {
+          case T_ARRAY:
+          case T_BOOLEAN:
+          case T_BYTE:
+          case T_SHORT:
+          case T_CHAR:
+          case T_INT:  single_slots++; break;
+          case T_LONG: double_slots++; break;
+          default:  ShouldNotReachHere();
+        }
+      } else if (in_regs[i].first()->is_XMMRegister()) {
+        switch (in_sig_bt[i]) {
+          case T_FLOAT:  single_slots++; break;
+          case T_DOUBLE: double_slots++; break;
+          default:  ShouldNotReachHere();
+        }
+      } else if (in_regs[i].first()->is_FloatRegister()) {
+        ShouldNotReachHere();
+      }
+    }
+    total_save_slots = double_slots * 2 + single_slots;
+    // align the save area
+    if (double_slots != 0) {
+      stack_slots = round_to(stack_slots, 2);
+    }
+  }
 
   int oop_handle_offset = stack_slots;
-  stack_slots += 6*VMRegImpl::slots_per_word;
+  stack_slots += total_save_slots;
 
   // Now any space we need for handlizing a klass if static method
 
-  int oop_temp_slot_offset = 0;
   int klass_slot_offset = 0;
   int klass_offset = -1;
   int lock_slot_offset = 0;
@@ -1273,7 +1572,6 @@
 
   int stack_size = stack_slots * VMRegImpl::stack_slot_size;
 
-
   // First thing make an ic check to see if we should even be here
 
   // We are free to use all registers as temps without saving them and
@@ -1284,22 +1582,22 @@
   const Register ic_reg = rax;
   const Register receiver = j_rarg0;
 
-  Label ok;
+  Label hit;
   Label exception_pending;
 
   assert_different_registers(ic_reg, receiver, rscratch1);
   __ verify_oop(receiver);
   __ load_klass(rscratch1, receiver);
   __ cmpq(ic_reg, rscratch1);
-  __ jcc(Assembler::equal, ok);
+  __ jcc(Assembler::equal, hit);
 
   __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 
-  __ bind(ok);
-
   // Verified entry point must be aligned
   __ align(8);
 
+  __ bind(hit);
+
   int vep_offset = ((intptr_t)__ pc()) - start;
 
   // The instruction at the verified entry point must be 5 bytes or longer
@@ -1320,9 +1618,8 @@
   // -2 because return address is already present and so is saved rbp
   __ subptr(rsp, stack_size - 2*wordSize);
 
-    // Frame is now completed as far as size and linkage.
-
-    int frame_complete = ((intptr_t)__ pc()) - start;
+  // Frame is now completed as far as size and linkage.
+  int frame_complete = ((intptr_t)__ pc()) - start;
 
 #ifdef ASSERT
     {
@@ -1342,7 +1639,10 @@
 
   const Register oop_handle_reg = r14;
 
-
+  if (is_critical_native) {
+    check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args,
+                                       oop_handle_offset, oop_maps, in_regs, in_sig_bt);
+  }
 
   //
   // We immediately shuffle the arguments so that any vm call we have to
@@ -1391,9 +1691,36 @@
 
 #endif /* ASSERT */
 
-
+  if (is_critical_native) {
+    // The mapping of Java and C arguments passed in registers are
+    // rotated by one, which helps when passing arguments to regular
+    // Java method but for critical natives that creates a cycle which
+    // can cause arguments to be killed before they are used.  Break
+    // the cycle by moving the first argument into a temporary
+    // register.
+    for (int i = 0; i < total_c_args; i++) {
+      if (in_regs[i].first()->is_Register() &&
+          in_regs[i].first()->as_Register() == rdi) {
+        __ mov(rbx, rdi);
+        in_regs[i].set1(rbx->as_VMReg());
+      }
+    }
+  }
+
+  // This may iterate in two different directions depending on the
+  // kind of native it is.  The reason is that for regular JNI natives
+  // the incoming and outgoing registers are offset upwards and for
+  // critical natives they are offset down.
   int c_arg = total_c_args - 1;
-  for ( int i = total_in_args - 1; i >= 0 ; i--, c_arg-- ) {
+  int stride = -1;
+  int init = total_in_args - 1;
+  if (is_critical_native) {
+    // stride forwards
+    c_arg = 0;
+    stride = 1;
+    init = 0;
+  }
+  for (int i = init, count = 0; count < total_in_args; i += stride, c_arg += stride, count++ ) {
 #ifdef ASSERT
     if (in_regs[i].first()->is_Register()) {
       assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
@@ -1408,7 +1735,20 @@
 #endif /* ASSERT */
     switch (in_sig_bt[i]) {
       case T_ARRAY:
+        if (is_critical_native) {
+          unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
+          c_arg++;
+#ifdef ASSERT
+          if (out_regs[c_arg].first()->is_Register()) {
+            reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
+          } else if (out_regs[c_arg].first()->is_XMMRegister()) {
+            freg_destroyed[out_regs[c_arg].first()->as_XMMRegister()->encoding()] = true;
+          }
+#endif
+          break;
+        }
       case T_OBJECT:
+        assert(!is_critical_native, "no oop arguments");
         object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
                     ((i == 0) && (!is_static)),
                     &receiver_offset);
@@ -1444,7 +1784,7 @@
 
   // Pre-load a static method's oop into r14.  Used both by locking code and
   // the normal JNI call code.
-  if (method->is_static()) {
+  if (method->is_static() && !is_critical_native) {
 
     //  load oop into a register
     __ movoop(oop_handle_reg, JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror()));
@@ -1510,6 +1850,7 @@
   Label lock_done;
 
   if (method->is_synchronized()) {
+    assert(!is_critical_native, "unhandled");
 
 
     const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
@@ -1573,13 +1914,14 @@
 
 
   // get JNIEnv* which is first argument to native
-
-  __ lea(c_rarg0, Address(r15_thread, in_bytes(JavaThread::jni_environment_offset())));
+  if (!is_critical_native) {
+    __ lea(c_rarg0, Address(r15_thread, in_bytes(JavaThread::jni_environment_offset())));
+  }
 
   // Now set thread in native
   __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native);
 
-  __ call(RuntimeAddress(method->native_function()));
+  __ call(RuntimeAddress(native_func));
 
     // Either restore the MXCSR register after returning from the JNI Call
     // or verify that it wasn't changed.
@@ -1635,6 +1977,7 @@
     }
   }
 
+  Label after_transition;
 
   // check for safepoint operation in progress and/or pending suspend requests
   {
@@ -1660,16 +2003,28 @@
     __ mov(r12, rsp); // remember sp
     __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
     __ andptr(rsp, -16); // align stack as required by ABI
-    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
+    if (!is_critical_native) {
+      __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
+    } else {
+      __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)));
+    }
     __ mov(rsp, r12); // restore sp
     __ reinit_heapbase();
     // Restore any method result value
     restore_native_result(masm, ret_type, stack_slots);
+
+    if (is_critical_native) {
+      // The call above performed the transition to thread_in_Java so
+      // skip the transition logic below.
+      __ jmpb(after_transition);
+    }
+
     __ bind(Continue);
   }
 
   // change thread state
   __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_Java);
+  __ bind(after_transition);
 
   Label reguard;
   Label reguard_done;
@@ -1747,17 +2102,21 @@
       __ verify_oop(rax);
   }
 
-  // reset handle block
-  __ movptr(rcx, Address(r15_thread, JavaThread::active_handles_offset()));
-  __ movptr(Address(rcx, JNIHandleBlock::top_offset_in_bytes()), (int32_t)NULL_WORD);
+  if (!is_critical_native) {
+    // reset handle block
+    __ movptr(rcx, Address(r15_thread, JavaThread::active_handles_offset()));
+    __ movptr(Address(rcx, JNIHandleBlock::top_offset_in_bytes()), (int32_t)NULL_WORD);
+  }
 
   // pop our frame
 
   __ leave();
 
-  // Any exception pending?
-  __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
-  __ jcc(Assembler::notEqual, exception_pending);
+  if (!is_critical_native) {
+    // Any exception pending?
+    __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
+    __ jcc(Assembler::notEqual, exception_pending);
+  }
 
   // Return
 
@@ -1765,12 +2124,13 @@
 
   // Unexpected paths are out of line and go here
 
-  // forward the exception
-  __ bind(exception_pending);
-
-  // and forward the exception
-  __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
-
+  if (!is_critical_native) {
+    // forward the exception
+    __ bind(exception_pending);
+
+    // and forward the exception
+    __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
+  }
 
   // Slow path locking & unlocking
   if (method->is_synchronized()) {
@@ -1877,6 +2237,11 @@
                                             (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
                                             in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
                                             oop_maps);
+
+  if (is_critical_native) {
+    nm->set_lazy_critical_native(true);
+  }
+
   return nm;
 
 }
@@ -2637,7 +3002,7 @@
   __ bind(no_pending_exception);
 #endif
 
-  // (tw) Start of graal uncommon trap code.
+#ifdef GRAAL
   __ jmp(cont);
 
   int jmp_uncommon_trap_offset = __ pc() - start;
@@ -2646,21 +3011,11 @@
 
   int uncommon_trap_offset = __ pc() - start;
 
-  // Warning: Duplicate code
-
   // Save everything in sight.
   RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
-
-  // Normal deoptimization
-
-
   // fetch_unroll_info needs to call last_java_frame()
   __ set_last_Java_frame(noreg, noreg, NULL);
 
-
-  //  __ movl(c_rarg1, (int32_t)Deoptimization::Unpack_reexecute);
-  //  __ movl(r14, c_rarg1); // save into r14 for later call to unpack_frames
-
   assert(r10 == rscratch1, "scratch register should be r10");
   __ movl(c_rarg1, Address(rsp, RegisterSaver::r10_offset_in_bytes()));
   __ orq(c_rarg1, ~(int32_t)Deoptimization::make_trap_request(Deoptimization::Reason_unreached, Deoptimization::Action_none));
@@ -2668,19 +3023,13 @@
   __ movl(r14, (int32_t)Deoptimization::Unpack_reexecute);
   __ mov(c_rarg0, r15_thread);
   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap)));
-
-  // Need to have an oopmap that tells fetch_unroll_info where to
-  // find any register it might need.
-
   oop_maps->add_gc_map( __ pc()-start, map->deep_copy());
 
   __ reset_last_Java_frame(false, false);
 
   Label after_fetch_unroll_info_call;
   __ jmp(after_fetch_unroll_info_call);
-
-
-  // (tw) End of graal uncommon trap code.
+#endif
 
   __ bind(cont);
 
@@ -2702,7 +3051,6 @@
     __ bind(L);
   }
 #endif // ASSERT
-  
   __ mov(c_rarg0, r15_thread);
   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)));
 
@@ -2712,7 +3060,9 @@
 
   __ reset_last_Java_frame(false, false);
 
+#ifdef GRAAL
   __ bind(after_fetch_unroll_info_call);
+#endif
 
   // Load UnrollBlock* into rdi
   __ mov(rdi, rax);
@@ -2883,8 +3233,10 @@
 
   _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
+#ifdef GRAAL
   _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset);
   _deopt_blob->set_jmp_uncommon_trap_offset(jmp_uncommon_trap_offset);
+#endif
 }
 
 #ifdef COMPILER2
--- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1374,8 +1374,7 @@
     //                                  L_success, L_failure, NULL);
     assert_different_registers(sub_klass, temp);
 
-    int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
-                     Klass::secondary_super_cache_offset_in_bytes());
+    int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
 
     // if the pointers are equal, we are done (e.g., String[] elements)
     __ cmpptr(sub_klass, super_klass_addr);
@@ -1787,8 +1786,7 @@
     //   array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
     //
 
-    int lh_offset = klassOopDesc::header_size() * HeapWordSize +
-                    Klass::layout_helper_offset_in_bytes();
+    int lh_offset = in_bytes(Klass::layout_helper_offset());
     Address src_klass_lh_addr(rcx_src_klass, lh_offset);
 
     // Handle objArrays completely differently...
@@ -1914,10 +1912,8 @@
     // live at this point:  rcx_src_klass, dst[_pos], src[_pos]
     {
       // Handy offsets:
-      int  ek_offset = (klassOopDesc::header_size() * HeapWordSize +
-                        objArrayKlass::element_klass_offset_in_bytes());
-      int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
-                        Klass::super_check_offset_offset_in_bytes());
+      int  ek_offset = in_bytes(objArrayKlass::element_klass_offset());
+      int sco_offset = in_bytes(Klass::super_check_offset_offset());
 
       Register rsi_dst_klass = rsi;
       Register rdi_temp      = rdi;
@@ -2190,7 +2186,7 @@
   // either at call sites or otherwise assume that stack unwinding will be initiated,
   // so caller saved registers were assumed volatile in the compiler.
   address generate_throw_exception(const char* name, address runtime_entry,
-                                   bool restore_saved_exception_pc, Register arg1 = noreg, Register arg2 = noreg) {
+                                   Register arg1 = noreg, Register arg2 = noreg) {
 
     int insts_size = 256;
     int locs_size  = 32;
@@ -2322,7 +2318,10 @@
     StubRoutines::_throw_WrongMethodTypeException_entry =
       generate_throw_exception("WrongMethodTypeException throw_exception",
                                CAST_FROM_FN_PTR(address, SharedRuntime::throw_WrongMethodTypeException),
-                               false, rax, rcx);
+                               rax, rcx);
+
+    // Build this early so it's available for the interpreter
+    StubRoutines::_throw_StackOverflowError_entry          = generate_throw_exception("StackOverflowError throw_exception",           CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError));
   }
 
 
@@ -2334,7 +2333,6 @@
     StubRoutines::_throw_AbstractMethodError_entry         = generate_throw_exception("AbstractMethodError throw_exception",          CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError));
     StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError));
     StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call));
-    StubRoutines::_throw_StackOverflowError_entry          = generate_throw_exception("StackOverflowError throw_exception",           CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError));
 
     //------------------------------------------------------------------------------------------------------------------------
     // entry points that are platform specific
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -2261,8 +2261,7 @@
     // The ckoff and ckval must be mutually consistent,
     // even though caller generates both.
     { Label L;
-      int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
-                        Klass::super_check_offset_offset_in_bytes());
+      int sco_offset = in_bytes(Klass::super_check_offset_offset());
       __ cmpl(ckoff, Address(ckval, sco_offset));
       __ jcc(Assembler::equal, L);
       __ stop("super_check_offset inconsistent");
@@ -2572,8 +2571,7 @@
     //   array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
     //
 
-    const int lh_offset = klassOopDesc::header_size() * HeapWordSize +
-                          Klass::layout_helper_offset_in_bytes();
+    const int lh_offset = in_bytes(Klass::layout_helper_offset());
 
     // Handle objArrays completely differently...
     const jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
@@ -2722,15 +2720,13 @@
       assert_clean_int(count, sco_temp);
 
       // Generate the type check.
-      const int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
-                              Klass::super_check_offset_offset_in_bytes());
+      const int sco_offset = in_bytes(Klass::super_check_offset_offset());
       __ movl(sco_temp, Address(r11_dst_klass, sco_offset));
       assert_clean_int(sco_temp, rax);
       generate_type_check(r10_src_klass, sco_temp, r11_dst_klass, L_plain_copy);
 
       // Fetch destination element klass from the objArrayKlass header.
-      int ek_offset = (klassOopDesc::header_size() * HeapWordSize +
-                       objArrayKlass::element_klass_offset_in_bytes());
+      int ek_offset = in_bytes(objArrayKlass::element_klass_offset());
       __ movptr(r11_dst_klass, Address(r11_dst_klass, ek_offset));
       __ movl(  sco_temp,      Address(r11_dst_klass, sco_offset));
       assert_clean_int(sco_temp, rax);
@@ -2982,7 +2978,9 @@
     int frame_complete = __ pc() - start;
 
     // Set up last_Java_sp and last_Java_fp
-    __ set_last_Java_frame(rsp, rbp, NULL);
+    address the_pc = __ pc();
+    __ set_last_Java_frame(rsp, rbp, the_pc);
+    __ andptr(rsp, -(StackAlignmentInBytes));    // Align stack
 
     // Call runtime
     if (arg1 != noreg) {
@@ -2999,9 +2997,9 @@
     // Generate oop map
     OopMap* map = new OopMap(framesize, 0);
 
-    oop_maps->add_gc_map(__ pc() - start, map);
-
-    __ reset_last_Java_frame(true, false);
+    oop_maps->add_gc_map(the_pc - start, map);
+
+    __ reset_last_Java_frame(true, true);
 
     __ leave(); // required for proper stackwalking of RuntimeStub frame
 
@@ -3072,6 +3070,13 @@
       generate_throw_exception("WrongMethodTypeException throw_exception",
                                CAST_FROM_FN_PTR(address, SharedRuntime::throw_WrongMethodTypeException),
                                rax, rcx);
+
+    // Build this early so it's available for the interpreter.
+    StubRoutines::_throw_StackOverflowError_entry =
+      generate_throw_exception("StackOverflowError throw_exception",
+                               CAST_FROM_FN_PTR(address,
+                                                SharedRuntime::
+                                                throw_StackOverflowError));
   }
 
   void generate_all() {
@@ -3098,12 +3103,6 @@
                                                 SharedRuntime::
                                                 throw_NullPointerException_at_call));
 
-    StubRoutines::_throw_StackOverflowError_entry =
-      generate_throw_exception("StackOverflowError throw_exception",
-                               CAST_FROM_FN_PTR(address,
-                                                SharedRuntime::
-                                                throw_StackOverflowError));
-
     // entry points that are platform specific
     StubRoutines::x86::_f2i_fixup = generate_f2i_fixup();
     StubRoutines::x86::_f2l_fixup = generate_f2l_fixup();
--- a/src/cpu/x86/vm/templateInterpreter_x86_32.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/x86/vm/templateInterpreter_x86_32.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -522,9 +522,18 @@
 
   __ pop(rsi);  // get saved bcp / (c++ prev state ).
 
-  __ pop(rax);  // get return address
-  __ jump(ExternalAddress(Interpreter::throw_StackOverflowError_entry()));
+  // Restore sender's sp as SP. This is necessary if the sender's
+  // frame is an extended compiled frame (see gen_c2i_adapter())
+  // and safer anyway in case of JSR292 adaptations.
 
+  __ pop(rax); // return address must be moved if SP is changed
+  __ mov(rsp, rsi);
+  __ push(rax);
+
+  // Note: the restored frame is not necessarily interpreted.
+  // Use the shared runtime version of the StackOverflowError.
+  assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated");
+  __ jump(ExternalAddress(StubRoutines::throw_StackOverflowError_entry()));
   // all done with frame size check
   __ bind(after_frame_check_pop);
   __ pop(rsi);
@@ -552,7 +561,7 @@
   #endif // ASSERT
   // get synchronization object
   { Label done;
-    const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes();
+    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
     __ movl(rax, access_flags);
     __ testl(rax, JVM_ACC_STATIC);
     __ movptr(rax, Address(rdi, Interpreter::local_offset_in_bytes(0)));  // get receiver (assume this is frequent case)
@@ -1012,7 +1021,7 @@
 
   // pass mirror handle if static call
   { Label L;
-    const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes();
+    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
     __ movl(t, Address(method, methodOopDesc::access_flags_offset()));
     __ testl(t, JVM_ACC_STATIC);
     __ jcc(Assembler::zero, L);
--- a/src/cpu/x86/vm/templateInterpreter_x86_64.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/x86/vm/templateInterpreter_x86_64.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -467,8 +467,18 @@
   __ cmpptr(rsp, rax);
   __ jcc(Assembler::above, after_frame_check);
 
-  __ pop(rax); // get return address
-  __ jump(ExternalAddress(Interpreter::throw_StackOverflowError_entry()));
+  // Restore sender's sp as SP. This is necessary if the sender's
+  // frame is an extended compiled frame (see gen_c2i_adapter())
+  // and safer anyway in case of JSR292 adaptations.
+
+  __ pop(rax); // return address must be moved if SP is changed
+  __ mov(rsp, r13);
+  __ push(rax);
+
+  // Note: the restored frame is not necessarily interpreted.
+  // Use the shared runtime version of the StackOverflowError.
+  assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated");
+  __ jump(ExternalAddress(StubRoutines::throw_StackOverflowError_entry()));
 
   // all done with frame size check
   __ bind(after_frame_check);
@@ -505,8 +515,7 @@
 
   // get synchronization object
   {
-    const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() +
-                              Klass::java_mirror_offset_in_bytes();
+    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
     Label done;
     __ movl(rax, access_flags);
     __ testl(rax, JVM_ACC_STATIC);
@@ -1006,8 +1015,7 @@
   // pass mirror handle if static call
   {
     Label L;
-    const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() +
-                              Klass::java_mirror_offset_in_bytes();
+    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
     __ movl(t, Address(method, methodOopDesc::access_flags_offset()));
     __ testl(t, JVM_ACC_STATIC);
     __ jcc(Assembler::zero, L);
--- a/src/cpu/x86/vm/templateTable_x86_32.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/x86/vm/templateTable_x86_32.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -980,7 +980,7 @@
   __ load_klass(rbx, rax);
   // Move superklass into EAX
   __ load_klass(rax, rdx);
-  __ movptr(rax, Address(rax, sizeof(oopDesc) + objArrayKlass::element_klass_offset_in_bytes()));
+  __ movptr(rax, Address(rax, objArrayKlass::element_klass_offset()));
   // Compress array+index*wordSize+12 into a single register.  Frees ECX.
   __ lea(rdx, element_address);
 
@@ -2033,7 +2033,7 @@
     assert(state == vtos, "only valid state");
     __ movptr(rax, aaddress(0));
     __ load_klass(rdi, rax);
-    __ movl(rdi, Address(rdi, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc)));
+    __ movl(rdi, Address(rdi, Klass::access_flags_offset()));
     __ testl(rdi, JVM_ACC_HAS_FINALIZER);
     Label skip_register_finalizer;
     __ jcc(Assembler::zero, skip_register_finalizer);
@@ -3188,11 +3188,11 @@
 
   // make sure klass is initialized & doesn't have finalizer
   // make sure klass is fully initialized
-  __ cmpl(Address(rcx, instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc)), instanceKlass::fully_initialized);
+  __ cmpb(Address(rcx, instanceKlass::init_state_offset()), instanceKlass::fully_initialized);
   __ jcc(Assembler::notEqual, slow_case);
 
   // get instance_size in instanceKlass (scaled to a count of bytes)
-  __ movl(rdx, Address(rcx, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc)));
+  __ movl(rdx, Address(rcx, Klass::layout_helper_offset()));
   // test to see if it has a finalizer or is malformed in some way
   __ testl(rdx, Klass::_lh_instance_slow_path_bit);
   __ jcc(Assembler::notZero, slow_case);
@@ -3293,7 +3293,7 @@
     __ bind(initialize_header);
     if (UseBiasedLocking) {
       __ pop(rcx);   // get saved klass back in the register.
-      __ movptr(rbx, Address(rcx, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+      __ movptr(rbx, Address(rcx, Klass::prototype_header_offset()));
       __ movptr(Address(rax, oopDesc::mark_offset_in_bytes ()), rbx);
     } else {
       __ movptr(Address(rax, oopDesc::mark_offset_in_bytes ()),
--- a/src/cpu/x86/vm/templateTable_x86_64.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/x86/vm/templateTable_x86_64.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1004,8 +1004,7 @@
   // Move superklass into rax
   __ load_klass(rax, rdx);
   __ movptr(rax, Address(rax,
-                         sizeof(oopDesc) +
-                         objArrayKlass::element_klass_offset_in_bytes()));
+                         objArrayKlass::element_klass_offset()));
   // Compress array + index*oopSize + 12 into a single register.  Frees rcx.
   __ lea(rdx, element_address);
 
@@ -2067,7 +2066,7 @@
     assert(state == vtos, "only valid state");
     __ movptr(c_rarg1, aaddress(0));
     __ load_klass(rdi, c_rarg1);
-    __ movl(rdi, Address(rdi, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc)));
+    __ movl(rdi, Address(rdi, Klass::access_flags_offset()));
     __ testl(rdi, JVM_ACC_HAS_FINALIZER);
     Label skip_register_finalizer;
     __ jcc(Assembler::zero, skip_register_finalizer);
@@ -3235,16 +3234,15 @@
 
   // make sure klass is initialized & doesn't have finalizer
   // make sure klass is fully initialized
-  __ cmpl(Address(rsi,
-                  instanceKlass::init_state_offset_in_bytes() +
-                  sizeof(oopDesc)),
+  __ cmpb(Address(rsi,
+                  instanceKlass::init_state_offset()),
           instanceKlass::fully_initialized);
   __ jcc(Assembler::notEqual, slow_case);
 
   // get instance_size in instanceKlass (scaled to a count of bytes)
   __ movl(rdx,
           Address(rsi,
-                  Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc)));
+                  Klass::layout_helper_offset()));
   // test to see if it has a finalizer or is malformed in some way
   __ testl(rdx, Klass::_lh_instance_slow_path_bit);
   __ jcc(Assembler::notZero, slow_case);
@@ -3337,7 +3335,7 @@
     // initialize object header only.
     __ bind(initialize_header);
     if (UseBiasedLocking) {
-      __ movptr(rscratch1, Address(rsi, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
+      __ movptr(rscratch1, Address(rsi, Klass::prototype_header_offset()));
       __ movptr(Address(rax, oopDesc::mark_offset_in_bytes()), rscratch1);
     } else {
       __ movptr(Address(rax, oopDesc::mark_offset_in_bytes()),
--- a/src/cpu/x86/vm/vm_version_x86.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/x86/vm/vm_version_x86.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -50,7 +50,7 @@
 VM_Version::CpuidInfo VM_Version::_cpuid_info   = { 0, };
 
 static BufferBlob* stub_blob;
-static const int stub_size = 400;
+static const int stub_size = 550;
 
 extern "C" {
   typedef void (*getPsrInfo_stub_t)(void*);
@@ -65,15 +65,15 @@
 
   address generate_getPsrInfo() {
     // Flags to test CPU type.
-    const uint32_t EFL_AC           = 0x40000;
-    const uint32_t EFL_ID           = 0x200000;
+    const uint32_t HS_EFL_AC           = 0x40000;
+    const uint32_t HS_EFL_ID           = 0x200000;
     // Values for when we don't have a CPUID instruction.
     const int      CPU_FAMILY_SHIFT = 8;
     const uint32_t CPU_FAMILY_386   = (3 << CPU_FAMILY_SHIFT);
     const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);
 
     Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
-    Label ext_cpuid1, ext_cpuid5, done;
+    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, done;
 
     StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub");
 #   define __ _masm->
@@ -100,7 +100,7 @@
     //
     // if we are unable to change the AC flag, we have a 386
     //
-    __ xorl(rax, EFL_AC);
+    __ xorl(rax, HS_EFL_AC);
     __ push(rax);
     __ popf();
     __ pushf();
@@ -118,7 +118,7 @@
     //
     __ bind(detect_486);
     __ mov(rax, rcx);
-    __ xorl(rax, EFL_ID);
+    __ xorl(rax, HS_EFL_ID);
     __ push(rax);
     __ popf();
     __ pushf();
@@ -229,14 +229,51 @@
     __ movl(Address(rsi, 8), rcx);
     __ movl(Address(rsi,12), rdx);
 
+    //
+    // Check if OS has enabled XGETBV instruction to access XCR0
+    // (OSXSAVE feature flag) and CPU supports AVX
+    //
+    __ andl(rcx, 0x18000000);
+    __ cmpl(rcx, 0x18000000);
+    __ jccb(Assembler::notEqual, sef_cpuid);
+
+    //
+    // XCR0, XFEATURE_ENABLED_MASK register
+    //
+    __ xorl(rcx, rcx);   // zero for XCR0 register
+    __ xgetbv();
+    __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
+    __ movl(Address(rsi, 0), rax);
+    __ movl(Address(rsi, 4), rdx);
+
+    //
+    // cpuid(0x7) Structured Extended Features
+    //
+    __ bind(sef_cpuid);
+    __ movl(rax, 7);
+    __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
+    __ jccb(Assembler::greater, ext_cpuid);
+
+    __ xorl(rcx, rcx);
+    __ cpuid();
+    __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
+    __ movl(Address(rsi, 0), rax);
+    __ movl(Address(rsi, 4), rbx);
+
+    //
+    // Extended cpuid(0x80000000)
+    //
+    __ bind(ext_cpuid);
     __ movl(rax, 0x80000000);
     __ cpuid();
     __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
     __ jcc(Assembler::belowEqual, done);
     __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
     __ jccb(Assembler::belowEqual, ext_cpuid1);
+    __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
+    __ jccb(Assembler::belowEqual, ext_cpuid5);
     __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
-    __ jccb(Assembler::belowEqual, ext_cpuid5);
+    __ jccb(Assembler::belowEqual, ext_cpuid7);
     //
     // Extended cpuid(0x80000008)
     //
@@ -249,6 +286,18 @@
     __ movl(Address(rsi,12), rdx);
 
     //
+    // Extended cpuid(0x80000007)
+    //
+    __ bind(ext_cpuid7);
+    __ movl(rax, 0x80000007);
+    __ cpuid();
+    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
+    __ movl(Address(rsi, 0), rax);
+    __ movl(Address(rsi, 4), rbx);
+    __ movl(Address(rsi, 8), rcx);
+    __ movl(Address(rsi,12), rdx);
+
+    //
     // Extended cpuid(0x80000005)
     //
     __ bind(ext_cpuid5);
@@ -359,13 +408,19 @@
   if (UseSSE < 1)
     _cpuFeatures &= ~CPU_SSE;
 
+  if (UseAVX < 2)
+    _cpuFeatures &= ~CPU_AVX2;
+
+  if (UseAVX < 1)
+    _cpuFeatures &= ~CPU_AVX;
+
   if (logical_processors_per_package() == 1) {
     // HT processor could be installed on a system which doesn't support HT.
     _cpuFeatures &= ~CPU_HT;
   }
 
   char buf[256];
-  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
                cores_per_cpu(), threads_per_core(),
                cpu_family(), _model, _stepping,
                (supports_cmov() ? ", cmov" : ""),
@@ -379,27 +434,39 @@
                (supports_sse4_1() ? ", sse4.1" : ""),
                (supports_sse4_2() ? ", sse4.2" : ""),
                (supports_popcnt() ? ", popcnt" : ""),
+               (supports_avx()    ? ", avx" : ""),
+               (supports_avx2()   ? ", avx2" : ""),
                (supports_mmx_ext() ? ", mmxext" : ""),
                (supports_3dnow_prefetch() ? ", 3dnowpref" : ""),
                (supports_lzcnt()   ? ", lzcnt": ""),
                (supports_sse4a()   ? ", sse4a": ""),
-               (supports_ht() ? ", ht": ""));
+               (supports_ht() ? ", ht": ""),
+               (supports_tsc() ? ", tsc": ""),
+               (supports_tscinv_bit() ? ", tscinvbit": ""),
+               (supports_tscinv() ? ", tscinv": ""));
   _features_str = strdup(buf);
 
   // UseSSE is set to the smaller of what hardware supports and what
   // the command line requires.  I.e., you cannot set UseSSE to 2 on
   // older Pentiums which do not support it.
-  if( UseSSE > 4 ) UseSSE=4;
-  if( UseSSE < 0 ) UseSSE=0;
-  if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support
+  if (UseSSE > 4) UseSSE=4;
+  if (UseSSE < 0) UseSSE=0;
+  if (!supports_sse4_1()) // Drop to 3 if no SSE4 support
     UseSSE = MIN2((intx)3,UseSSE);
-  if( !supports_sse3() ) // Drop to 2 if no SSE3 support
+  if (!supports_sse3()) // Drop to 2 if no SSE3 support
     UseSSE = MIN2((intx)2,UseSSE);
-  if( !supports_sse2() ) // Drop to 1 if no SSE2 support
+  if (!supports_sse2()) // Drop to 1 if no SSE2 support
     UseSSE = MIN2((intx)1,UseSSE);
-  if( !supports_sse () ) // Drop to 0 if no SSE  support
+  if (!supports_sse ()) // Drop to 0 if no SSE  support
     UseSSE = 0;
 
+  if (UseAVX > 2) UseAVX=2;
+  if (UseAVX < 0) UseAVX=0;
+  if (!supports_avx2()) // Drop to 1 if no AVX2 support
+    UseAVX = MIN2((intx)1,UseAVX);
+  if (!supports_avx ()) // Drop to 0 if no AVX  support
+    UseAVX = 0;
+
   // On new cpus instructions which update whole XMM register should be used
   // to prevent partial register stall due to dependencies on high half.
   //
@@ -534,6 +601,9 @@
     if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
       UsePopCountInstruction = true;
     }
+  } else if (UsePopCountInstruction) {
+    warning("POPCNT instruction is not available on this CPU");
+    FLAG_SET_DEFAULT(UsePopCountInstruction, false);
   }
 
 #ifdef COMPILER2
@@ -605,7 +675,11 @@
   if (PrintMiscellaneous && Verbose) {
     tty->print_cr("Logical CPUs per core: %u",
                   logical_processors_per_package());
-    tty->print_cr("UseSSE=%d",UseSSE);
+    tty->print("UseSSE=%d",UseSSE);
+    if (UseAVX > 0) {
+      tty->print("  UseAVX=%d",UseAVX);
+    }
+    tty->cr();
     tty->print("Allocation");
     if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) {
       tty->print_cr(": no prefetching");
--- a/src/cpu/x86/vm/vm_version_x86.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/x86/vm/vm_version_x86.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -78,7 +78,10 @@
                sse4_2   : 1,
                         : 2,
                popcnt   : 1,
-                        : 8;
+                        : 3,
+               osxsave  : 1,
+               avx      : 1,
+                        : 3;
     } bits;
   };
 
@@ -168,6 +171,15 @@
     } bits;
   };
 
+  union ExtCpuid7Edx {
+    uint32_t value;
+    struct {
+      uint32_t               : 8,
+              tsc_invariance : 1,
+                             : 23;
+    } bits;
+  };
+
   union ExtCpuid8Ecx {
     uint32_t value;
     struct {
@@ -176,32 +188,80 @@
     } bits;
   };
 
+  union SefCpuid7Eax {
+    uint32_t value;
+  };
+
+  union SefCpuid7Ebx {
+    uint32_t value;
+    struct {
+      uint32_t fsgsbase : 1,
+                        : 2,
+                   bmi1 : 1,
+                        : 1,
+                   avx2 : 1,
+                        : 2,
+                   bmi2 : 1,
+                        : 23;
+    } bits;
+  };
+
+  union XemXcr0Eax {
+    uint32_t value;
+    struct {
+      uint32_t x87 : 1,
+               sse : 1,
+               ymm : 1,
+                   : 29;
+    } bits;
+  };
+
 protected:
-   static int _cpu;
-   static int _model;
-   static int _stepping;
-   static int _cpuFeatures;     // features returned by the "cpuid" instruction
-                                // 0 if this instruction is not available
-   static const char* _features_str;
+  static int _cpu;
+  static int _model;
+  static int _stepping;
+  static int _cpuFeatures;     // features returned by the "cpuid" instruction
+                               // 0 if this instruction is not available
+  static const char* _features_str;
 
-   enum {
-     CPU_CX8    = (1 << 0), // next bits are from cpuid 1 (EDX)
-     CPU_CMOV   = (1 << 1),
-     CPU_FXSR   = (1 << 2),
-     CPU_HT     = (1 << 3),
-     CPU_MMX    = (1 << 4),
-     CPU_3DNOW_PREFETCH  = (1 << 5), // Processor supports 3dnow prefetch and prefetchw instructions
-                                     // may not necessarily support other 3dnow instructions
-     CPU_SSE    = (1 << 6),
-     CPU_SSE2   = (1 << 7),
-     CPU_SSE3   = (1 << 8), // SSE3 comes from cpuid 1 (ECX)
-     CPU_SSSE3  = (1 << 9),
-     CPU_SSE4A  = (1 << 10),
-     CPU_SSE4_1 = (1 << 11),
-     CPU_SSE4_2 = (1 << 12),
-     CPU_POPCNT = (1 << 13),
-     CPU_LZCNT  = (1 << 14)
-   } cpuFeatureFlags;
+  enum {
+    CPU_CX8    = (1 << 0), // next bits are from cpuid 1 (EDX)
+    CPU_CMOV   = (1 << 1),
+    CPU_FXSR   = (1 << 2),
+    CPU_HT     = (1 << 3),
+    CPU_MMX    = (1 << 4),
+    CPU_3DNOW_PREFETCH  = (1 << 5), // Processor supports 3dnow prefetch and prefetchw instructions
+                                    // may not necessarily support other 3dnow instructions
+    CPU_SSE    = (1 << 6),
+    CPU_SSE2   = (1 << 7),
+    CPU_SSE3   = (1 << 8), // SSE3 comes from cpuid 1 (ECX)
+    CPU_SSSE3  = (1 << 9),
+    CPU_SSE4A  = (1 << 10),
+    CPU_SSE4_1 = (1 << 11),
+    CPU_SSE4_2 = (1 << 12),
+    CPU_POPCNT = (1 << 13),
+    CPU_LZCNT  = (1 << 14),
+    CPU_TSC    = (1 << 15),
+    CPU_TSCINV = (1 << 16),
+    CPU_AVX    = (1 << 17),
+    CPU_AVX2   = (1 << 18)
+  } cpuFeatureFlags;
+
+  enum {
+    // AMD
+    CPU_FAMILY_AMD_11H       = 0x11,
+    // Intel
+    CPU_FAMILY_INTEL_CORE    = 6,
+    CPU_MODEL_NEHALEM        = 0x1e,
+    CPU_MODEL_NEHALEM_EP     = 0x1a,
+    CPU_MODEL_NEHALEM_EX     = 0x2e,
+    CPU_MODEL_WESTMERE       = 0x25,
+    CPU_MODEL_WESTMERE_EP    = 0x2c,
+    CPU_MODEL_WESTMERE_EX    = 0x2f,
+    CPU_MODEL_SANDYBRIDGE    = 0x2a,
+    CPU_MODEL_SANDYBRIDGE_EP = 0x2d,
+    CPU_MODEL_IVYBRIDGE_EP   = 0x3a
+  } cpuExtendedFamily;
 
   // cpuid information block.  All info derived from executing cpuid with
   // various function numbers is stored here.  Intel and AMD info is
@@ -228,6 +288,12 @@
     uint32_t     dcp_cpuid4_ecx; // unused currently
     uint32_t     dcp_cpuid4_edx; // unused currently
 
+    // cpuid function 7 (structured extended features)
+    SefCpuid7Eax sef_cpuid7_eax;
+    SefCpuid7Ebx sef_cpuid7_ebx;
+    uint32_t     sef_cpuid7_ecx; // unused currently
+    uint32_t     sef_cpuid7_edx; // unused currently
+
     // cpuid function 0xB (processor topology)
     // ecx = 0
     uint32_t     tpl_cpuidB0_eax;
@@ -264,17 +330,27 @@
     uint32_t proc_name_4, proc_name_5, proc_name_6, proc_name_7;
     uint32_t proc_name_8, proc_name_9, proc_name_10,proc_name_11;
 
-    // cpuid function 0x80000005 //AMD L1, Intel reserved
+    // cpuid function 0x80000005 // AMD L1, Intel reserved
     uint32_t     ext_cpuid5_eax; // unused currently
     uint32_t     ext_cpuid5_ebx; // reserved
     ExtCpuid5Ex  ext_cpuid5_ecx; // L1 data cache info (AMD)
     ExtCpuid5Ex  ext_cpuid5_edx; // L1 instruction cache info (AMD)
 
+    // cpuid function 0x80000007
+    uint32_t     ext_cpuid7_eax; // reserved
+    uint32_t     ext_cpuid7_ebx; // reserved
+    uint32_t     ext_cpuid7_ecx; // reserved
+    ExtCpuid7Edx ext_cpuid7_edx; // tscinv
+
     // cpuid function 0x80000008
     uint32_t     ext_cpuid8_eax; // unused currently
     uint32_t     ext_cpuid8_ebx; // reserved
     ExtCpuid8Ecx ext_cpuid8_ecx;
     uint32_t     ext_cpuid8_edx; // reserved
+
+    // extended control register XCR0 (the XFEATURE_ENABLED_MASK register)
+    XemXcr0Eax   xem_xcr0_eax;
+    uint32_t     xem_xcr0_edx; // reserved
   };
 
   // The actual cpuid info block
@@ -286,19 +362,23 @@
     result += _cpuid_info.std_cpuid1_eax.bits.ext_family;
     return result;
   }
+
   static uint32_t extended_cpu_model() {
     uint32_t result = _cpuid_info.std_cpuid1_eax.bits.model;
     result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4;
     return result;
   }
+
   static uint32_t cpu_stepping() {
     uint32_t result = _cpuid_info.std_cpuid1_eax.bits.stepping;
     return result;
   }
+
   static uint logical_processor_count() {
     uint result = threads_per_core();
     return result;
   }
+
   static uint32_t feature_flags() {
     uint32_t result = 0;
     if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0)
@@ -328,6 +408,18 @@
       result |= CPU_SSE4_2;
     if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0)
       result |= CPU_POPCNT;
+    if (_cpuid_info.std_cpuid1_ecx.bits.avx != 0 &&
+        _cpuid_info.std_cpuid1_ecx.bits.osxsave != 0 &&
+        _cpuid_info.xem_xcr0_eax.bits.sse != 0 &&
+        _cpuid_info.xem_xcr0_eax.bits.ymm != 0) {
+      result |= CPU_AVX;
+      if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0)
+        result |= CPU_AVX2;
+    }
+    if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0)
+      result |= CPU_TSC;
+    if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0)
+      result |= CPU_TSCINV;
 
     // AMD features.
     if (is_amd()) {
@@ -350,12 +442,15 @@
   static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); }
   static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); }
   static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); }
+  static ByteSize sef_cpuid7_offset() { return byte_offset_of(CpuidInfo, sef_cpuid7_eax); }
   static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); }
   static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
+  static ByteSize ext_cpuid7_offset() { return byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
   static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
   static ByteSize tpl_cpuidB0_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
   static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
   static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
+  static ByteSize xem_xcr0_offset() { return byte_offset_of(CpuidInfo, xem_xcr0_eax); }
 
   // Initialization
   static void initialize();
@@ -382,7 +477,6 @@
   //
   static int  cpu_family()        { return _cpu;}
   static bool is_P6()             { return cpu_family() >= 6; }
-
   static bool is_amd()            { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA'
   static bool is_intel()          { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG'
 
@@ -447,14 +541,51 @@
   static bool supports_sse4_1()   { return (_cpuFeatures & CPU_SSE4_1) != 0; }
   static bool supports_sse4_2()   { return (_cpuFeatures & CPU_SSE4_2) != 0; }
   static bool supports_popcnt()   { return (_cpuFeatures & CPU_POPCNT) != 0; }
-  //
+  static bool supports_avx()      { return (_cpuFeatures & CPU_AVX) != 0; }
+  static bool supports_avx2()     { return (_cpuFeatures & CPU_AVX2) != 0; }
+  static bool supports_tsc()      { return (_cpuFeatures & CPU_TSC)    != 0; }
+
+  // Intel features
+  static bool is_intel_family_core() { return is_intel() &&
+                                       extended_cpu_family() == CPU_FAMILY_INTEL_CORE; }
+
+  static bool is_intel_tsc_synched_at_init()  {
+    if (is_intel_family_core()) {
+      uint32_t ext_model = extended_cpu_model();
+      if (ext_model == CPU_MODEL_NEHALEM_EP     ||
+          ext_model == CPU_MODEL_WESTMERE_EP    ||
+          ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
+          ext_model == CPU_MODEL_IVYBRIDGE_EP) {
+        // <= 2-socket invariant tsc support. EX versions are usually used
+        // in > 2-socket systems and likely don't synchronize tscs at
+        // initialization.
+        // Code that uses tsc values must be prepared for them to arbitrarily
+        // jump forward or backward.
+        return true;
+      }
+    }
+    return false;
+  }
+
   // AMD features
-  //
   static bool supports_3dnow_prefetch()    { return (_cpuFeatures & CPU_3DNOW_PREFETCH) != 0; }
   static bool supports_mmx_ext()  { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.mmx_amd != 0; }
   static bool supports_lzcnt()    { return (_cpuFeatures & CPU_LZCNT) != 0; }
   static bool supports_sse4a()    { return (_cpuFeatures & CPU_SSE4A) != 0; }
 
+  static bool is_amd_Barcelona()  { return is_amd() &&
+                                           extended_cpu_family() == CPU_FAMILY_AMD_11H; }
+
+  // Intel and AMD newer cores support fast timestamps well
+  static bool supports_tscinv_bit() {
+    return (_cpuFeatures & CPU_TSCINV) != 0;
+  }
+  static bool supports_tscinv() {
+    return supports_tscinv_bit() &&
+           ( (is_amd() && !is_amd_Barcelona()) ||
+             is_intel_tsc_synched_at_init() );
+  }
+
   // Intel Core and newer cpus have fast IDIV instruction (excluding Atom).
   static bool has_fast_idiv()     { return is_intel() && cpu_family() == 6 &&
                                            supports_sse3() && _model != 0x1C; }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/x86/vm/x86.ad	Mon Feb 27 15:06:36 2012 -0800
@@ -0,0 +1,854 @@
+//
+// Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+//
+
+// X86 Common Architecture Description File
+
+source %{
+  // Float masks come from different places depending on platform.
+#ifdef _LP64
+  static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
+  static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
+  static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
+  static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
+#else
+  static address float_signmask()  { return (address)float_signmask_pool; }
+  static address float_signflip()  { return (address)float_signflip_pool; }
+  static address double_signmask() { return (address)double_signmask_pool; }
+  static address double_signflip() { return (address)double_signflip_pool; }
+#endif
+
+#ifndef PRODUCT
+  void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
+    st->print("nop \t# %d bytes pad for loops and calls", _count);
+  }
+#endif
+
+  void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
+    MacroAssembler _masm(&cbuf);
+    __ nop(_count);
+  }
+
+  uint MachNopNode::size(PhaseRegAlloc*) const {
+    return _count;
+  }
+
+#ifndef PRODUCT
+  void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
+    st->print("# breakpoint");
+  }
+#endif
+
+  void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const {
+    MacroAssembler _masm(&cbuf);
+    __ int3();
+  }
+
+  uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
+    return MachNode::size(ra_);
+  }
+
+%}
+
+encode %{
+
+  enc_class preserve_SP %{
+    debug_only(int off0 = cbuf.insts_size());
+    MacroAssembler _masm(&cbuf);
+    // RBP is preserved across all calls, even compiled calls.
+    // Use it to preserve RSP in places where the callee might change the SP.
+    __ movptr(rbp_mh_SP_save, rsp);
+    debug_only(int off1 = cbuf.insts_size());
+    assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
+  %}
+
+  enc_class restore_SP %{
+    MacroAssembler _masm(&cbuf);
+    __ movptr(rsp, rbp_mh_SP_save);
+  %}
+
+  enc_class call_epilog %{
+    if (VerifyStackAtCalls) {
+      // Check that stack depth is unchanged: find majik cookie on stack
+      int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
+      MacroAssembler _masm(&cbuf);
+      Label L;
+      __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
+      __ jccb(Assembler::equal, L);
+      // Die if stack mismatch
+      __ int3();
+      __ bind(L);
+    }
+  %}
+
+%}
+
+// INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
+
+// ============================================================================
+
+instruct ShouldNotReachHere() %{
+  match(Halt);
+  format %{ "int3\t# ShouldNotReachHere" %}
+  ins_encode %{
+    __ int3();
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// ============================================================================
+
+instruct addF_reg(regF dst, regF src) %{
+  predicate((UseSSE>=1) && (UseAVX == 0));
+  match(Set dst (AddF dst src));
+
+  format %{ "addss   $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ addss($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct addF_mem(regF dst, memory src) %{
+  predicate((UseSSE>=1) && (UseAVX == 0));
+  match(Set dst (AddF dst (LoadF src)));
+
+  format %{ "addss   $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ addss($dst$$XMMRegister, $src$$Address);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct addF_imm(regF dst, immF con) %{
+  predicate((UseSSE>=1) && (UseAVX == 0));
+  match(Set dst (AddF dst con));
+  format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
+  ins_cost(150);
+  ins_encode %{
+    __ addss($dst$$XMMRegister, $constantaddress($con));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vaddF_reg(regF dst, regF src1, regF src2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (AddF src1 src2));
+
+  format %{ "vaddss  $dst, $src1, $src2" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vaddF_mem(regF dst, regF src1, memory src2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (AddF src1 (LoadF src2)));
+
+  format %{ "vaddss  $dst, $src1, $src2" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vaddF_imm(regF dst, regF src, immF con) %{
+  predicate(UseAVX > 0);
+  match(Set dst (AddF src con));
+
+  format %{ "vaddss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct addD_reg(regD dst, regD src) %{
+  predicate((UseSSE>=2) && (UseAVX == 0));
+  match(Set dst (AddD dst src));
+
+  format %{ "addsd   $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ addsd($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct addD_mem(regD dst, memory src) %{
+  predicate((UseSSE>=2) && (UseAVX == 0));
+  match(Set dst (AddD dst (LoadD src)));
+
+  format %{ "addsd   $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ addsd($dst$$XMMRegister, $src$$Address);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct addD_imm(regD dst, immD con) %{
+  predicate((UseSSE>=2) && (UseAVX == 0));
+  match(Set dst (AddD dst con));
+  format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
+  ins_cost(150);
+  ins_encode %{
+    __ addsd($dst$$XMMRegister, $constantaddress($con));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vaddD_reg(regD dst, regD src1, regD src2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (AddD src1 src2));
+
+  format %{ "vaddsd  $dst, $src1, $src2" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vaddD_mem(regD dst, regD src1, memory src2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (AddD src1 (LoadD src2)));
+
+  format %{ "vaddsd  $dst, $src1, $src2" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vaddD_imm(regD dst, regD src, immD con) %{
+  predicate(UseAVX > 0);
+  match(Set dst (AddD src con));
+
+  format %{ "vaddsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct subF_reg(regF dst, regF src) %{
+  predicate((UseSSE>=1) && (UseAVX == 0));
+  match(Set dst (SubF dst src));
+
+  format %{ "subss   $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ subss($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct subF_mem(regF dst, memory src) %{
+  predicate((UseSSE>=1) && (UseAVX == 0));
+  match(Set dst (SubF dst (LoadF src)));
+
+  format %{ "subss   $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ subss($dst$$XMMRegister, $src$$Address);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct subF_imm(regF dst, immF con) %{
+  predicate((UseSSE>=1) && (UseAVX == 0));
+  match(Set dst (SubF dst con));
+  format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
+  ins_cost(150);
+  ins_encode %{
+    __ subss($dst$$XMMRegister, $constantaddress($con));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vsubF_reg(regF dst, regF src1, regF src2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (SubF src1 src2));
+
+  format %{ "vsubss  $dst, $src1, $src2" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vsubF_mem(regF dst, regF src1, memory src2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (SubF src1 (LoadF src2)));
+
+  format %{ "vsubss  $dst, $src1, $src2" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vsubF_imm(regF dst, regF src, immF con) %{
+  predicate(UseAVX > 0);
+  match(Set dst (SubF src con));
+
+  format %{ "vsubss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct subD_reg(regD dst, regD src) %{
+  predicate((UseSSE>=2) && (UseAVX == 0));
+  match(Set dst (SubD dst src));
+
+  format %{ "subsd   $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ subsd($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct subD_mem(regD dst, memory src) %{
+  predicate((UseSSE>=2) && (UseAVX == 0));
+  match(Set dst (SubD dst (LoadD src)));
+
+  format %{ "subsd   $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ subsd($dst$$XMMRegister, $src$$Address);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct subD_imm(regD dst, immD con) %{
+  predicate((UseSSE>=2) && (UseAVX == 0));
+  match(Set dst (SubD dst con));
+  format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
+  ins_cost(150);
+  ins_encode %{
+    __ subsd($dst$$XMMRegister, $constantaddress($con));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vsubD_reg(regD dst, regD src1, regD src2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (SubD src1 src2));
+
+  format %{ "vsubsd  $dst, $src1, $src2" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vsubD_mem(regD dst, regD src1, memory src2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (SubD src1 (LoadD src2)));
+
+  format %{ "vsubsd  $dst, $src1, $src2" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vsubD_imm(regD dst, regD src, immD con) %{
+  predicate(UseAVX > 0);
+  match(Set dst (SubD src con));
+
+  format %{ "vsubsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct mulF_reg(regF dst, regF src) %{
+  predicate((UseSSE>=1) && (UseAVX == 0));
+  match(Set dst (MulF dst src));
+
+  format %{ "mulss   $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ mulss($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct mulF_mem(regF dst, memory src) %{
+  predicate((UseSSE>=1) && (UseAVX == 0));
+  match(Set dst (MulF dst (LoadF src)));
+
+  format %{ "mulss   $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ mulss($dst$$XMMRegister, $src$$Address);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct mulF_imm(regF dst, immF con) %{
+  predicate((UseSSE>=1) && (UseAVX == 0));
+  match(Set dst (MulF dst con));
+  format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
+  ins_cost(150);
+  ins_encode %{
+    __ mulss($dst$$XMMRegister, $constantaddress($con));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vmulF_reg(regF dst, regF src1, regF src2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (MulF src1 src2));
+
+  format %{ "vmulss  $dst, $src1, $src2" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vmulF_mem(regF dst, regF src1, memory src2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (MulF src1 (LoadF src2)));
+
+  format %{ "vmulss  $dst, $src1, $src2" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vmulF_imm(regF dst, regF src, immF con) %{
+  predicate(UseAVX > 0);
+  match(Set dst (MulF src con));
+
+  format %{ "vmulss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct mulD_reg(regD dst, regD src) %{
+  predicate((UseSSE>=2) && (UseAVX == 0));
+  match(Set dst (MulD dst src));
+
+  format %{ "mulsd   $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct mulD_mem(regD dst, memory src) %{
+  predicate((UseSSE>=2) && (UseAVX == 0));
+  match(Set dst (MulD dst (LoadD src)));
+
+  format %{ "mulsd   $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ mulsd($dst$$XMMRegister, $src$$Address);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct mulD_imm(regD dst, immD con) %{
+  predicate((UseSSE>=2) && (UseAVX == 0));
+  match(Set dst (MulD dst con));
+  format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
+  ins_cost(150);
+  ins_encode %{
+    __ mulsd($dst$$XMMRegister, $constantaddress($con));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vmulD_reg(regD dst, regD src1, regD src2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (MulD src1 src2));
+
+  format %{ "vmulsd  $dst, $src1, $src2" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vmulD_mem(regD dst, regD src1, memory src2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (MulD src1 (LoadD src2)));
+
+  format %{ "vmulsd  $dst, $src1, $src2" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vmulD_imm(regD dst, regD src, immD con) %{
+  predicate(UseAVX > 0);
+  match(Set dst (MulD src con));
+
+  format %{ "vmulsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct divF_reg(regF dst, regF src) %{
+  predicate((UseSSE>=1) && (UseAVX == 0));
+  match(Set dst (DivF dst src));
+
+  format %{ "divss   $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ divss($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct divF_mem(regF dst, memory src) %{
+  predicate((UseSSE>=1) && (UseAVX == 0));
+  match(Set dst (DivF dst (LoadF src)));
+
+  format %{ "divss   $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ divss($dst$$XMMRegister, $src$$Address);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct divF_imm(regF dst, immF con) %{
+  predicate((UseSSE>=1) && (UseAVX == 0));
+  match(Set dst (DivF dst con));
+  format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
+  ins_cost(150);
+  ins_encode %{
+    __ divss($dst$$XMMRegister, $constantaddress($con));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vdivF_reg(regF dst, regF src1, regF src2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (DivF src1 src2));
+
+  format %{ "vdivss  $dst, $src1, $src2" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vdivF_mem(regF dst, regF src1, memory src2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (DivF src1 (LoadF src2)));
+
+  format %{ "vdivss  $dst, $src1, $src2" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vdivF_imm(regF dst, regF src, immF con) %{
+  predicate(UseAVX > 0);
+  match(Set dst (DivF src con));
+
+  format %{ "vdivss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct divD_reg(regD dst, regD src) %{
+  predicate((UseSSE>=2) && (UseAVX == 0));
+  match(Set dst (DivD dst src));
+
+  format %{ "divsd   $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ divsd($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct divD_mem(regD dst, memory src) %{
+  predicate((UseSSE>=2) && (UseAVX == 0));
+  match(Set dst (DivD dst (LoadD src)));
+
+  format %{ "divsd   $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ divsd($dst$$XMMRegister, $src$$Address);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct divD_imm(regD dst, immD con) %{
+  predicate((UseSSE>=2) && (UseAVX == 0));
+  match(Set dst (DivD dst con));
+  format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
+  ins_cost(150);
+  ins_encode %{
+    __ divsd($dst$$XMMRegister, $constantaddress($con));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vdivD_reg(regD dst, regD src1, regD src2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (DivD src1 src2));
+
+  format %{ "vdivsd  $dst, $src1, $src2" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vdivD_mem(regD dst, regD src1, memory src2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (DivD src1 (LoadD src2)));
+
+  format %{ "vdivsd  $dst, $src1, $src2" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vdivD_imm(regD dst, regD src, immD con) %{
+  predicate(UseAVX > 0);
+  match(Set dst (DivD src con));
+
+  format %{ "vdivsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
+  ins_cost(150);
+  ins_encode %{
+    __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct absF_reg(regF dst) %{
+  predicate((UseSSE>=1) && (UseAVX == 0));
+  match(Set dst (AbsF dst));
+  ins_cost(150);
+  format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
+  ins_encode %{
+    __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vabsF_reg(regF dst, regF src) %{
+  predicate(UseAVX > 0);
+  match(Set dst (AbsF src));
+  ins_cost(150);
+  format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
+  ins_encode %{
+    __ vandps($dst$$XMMRegister, $src$$XMMRegister,
+              ExternalAddress(float_signmask()));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct absD_reg(regD dst) %{
+  predicate((UseSSE>=2) && (UseAVX == 0));
+  match(Set dst (AbsD dst));
+  ins_cost(150);
+  format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
+            "# abs double by sign masking" %}
+  ins_encode %{
+    __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vabsD_reg(regD dst, regD src) %{
+  predicate(UseAVX > 0);
+  match(Set dst (AbsD src));
+  ins_cost(150);
+  format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
+            "# abs double by sign masking" %}
+  ins_encode %{
+    __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
+              ExternalAddress(double_signmask()));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct negF_reg(regF dst) %{
+  predicate((UseSSE>=1) && (UseAVX == 0));
+  match(Set dst (NegF dst));
+  ins_cost(150);
+  format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
+  ins_encode %{
+    __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vnegF_reg(regF dst, regF src) %{
+  predicate(UseAVX > 0);
+  match(Set dst (NegF src));
+  ins_cost(150);
+  format %{ "vxorps  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
+  ins_encode %{
+    __ vxorps($dst$$XMMRegister, $src$$XMMRegister,
+              ExternalAddress(float_signflip()));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct negD_reg(regD dst) %{
+  predicate((UseSSE>=2) && (UseAVX == 0));
+  match(Set dst (NegD dst));
+  ins_cost(150);
+  format %{ "xorpd   $dst, [0x8000000000000000]\t"
+            "# neg double by sign flipping" %}
+  ins_encode %{
+    __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vnegD_reg(regD dst, regD src) %{
+  predicate(UseAVX > 0);
+  match(Set dst (NegD src));
+  ins_cost(150);
+  format %{ "vxorpd  $dst, $src, [0x8000000000000000]\t"
+            "# neg double by sign flipping" %}
+  ins_encode %{
+    __ vxorpd($dst$$XMMRegister, $src$$XMMRegister,
+              ExternalAddress(double_signflip()));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct sqrtF_reg(regF dst, regF src) %{
+  predicate(UseSSE>=1);
+  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
+
+  format %{ "sqrtss  $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ sqrtss($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct sqrtF_mem(regF dst, memory src) %{
+  predicate(UseSSE>=1);
+  match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
+
+  format %{ "sqrtss  $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ sqrtss($dst$$XMMRegister, $src$$Address);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct sqrtF_imm(regF dst, immF con) %{
+  predicate(UseSSE>=1);
+  match(Set dst (ConvD2F (SqrtD (ConvF2D con))));
+  format %{ "sqrtss  $dst, [$constantaddress]\t# load from constant table: float=$con" %}
+  ins_cost(150);
+  ins_encode %{
+    __ sqrtss($dst$$XMMRegister, $constantaddress($con));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct sqrtD_reg(regD dst, regD src) %{
+  predicate(UseSSE>=2);
+  match(Set dst (SqrtD src));
+
+  format %{ "sqrtsd  $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct sqrtD_mem(regD dst, memory src) %{
+  predicate(UseSSE>=2);
+  match(Set dst (SqrtD (LoadD src)));
+
+  format %{ "sqrtsd  $dst, $src" %}
+  ins_cost(150);
+  ins_encode %{
+    __ sqrtsd($dst$$XMMRegister, $src$$Address);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct sqrtD_imm(regD dst, immD con) %{
+  predicate(UseSSE>=2);
+  match(Set dst (SqrtD con));
+  format %{ "sqrtsd  $dst, [$constantaddress]\t# load from constant table: double=$con" %}
+  ins_cost(150);
+  ins_encode %{
+    __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
--- a/src/cpu/x86/vm/x86_32.ad	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/x86/vm/x86_32.ad	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
 // This code is free software; you can redistribute it and/or modify it
@@ -281,7 +281,7 @@
 }
 
 static int preserve_SP_size() {
-  return LP64_ONLY(1 +) 2;  // [rex,] op, rm(reg/reg)
+  return 2;  // op, rm(reg/reg)
 }
 
 // !!!!! Special hack to get all type of calls to specify the byte offset
@@ -341,12 +341,6 @@
   return round_to(current_offset, alignment_required()) - current_offset;
 }
 
-#ifndef PRODUCT
-void MachBreakpointNode::format( PhaseRegAlloc *, outputStream* st ) const {
-  st->print("INT3");
-}
-#endif
-
 // EMIT_RM()
 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
@@ -495,14 +489,34 @@
   }
 }
 
-void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
-  if( dst_encoding == src_encoding ) {
-    // reg-reg copy, use an empty encoding
-  } else {
-    MacroAssembler _masm(&cbuf);
-
-    __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
-  }
+void emit_cmpfp_fixup(MacroAssembler& _masm) {
+  Label exit;
+  __ jccb(Assembler::noParity, exit);
+  __ pushf();
+  //
+  // comiss/ucomiss instructions set ZF,PF,CF flags and
+  // zero OF,AF,SF for NaN values.
+  // Fixup flags by zeroing ZF,PF so that compare of NaN
+  // values returns 'less than' result (CF is set).
+  // Leave the rest of flags unchanged.
+  //
+  //    7 6 5 4 3 2 1 0
+  //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
+  //    0 0 1 0 1 0 1 1   (0x2B)
+  //
+  __ andl(Address(rsp, 0), 0xffffff2b);
+  __ popf();
+  __ bind(exit);
+}
+
+void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
+  Label done;
+  __ movl(dst, -1);
+  __ jcc(Assembler::parity, done);
+  __ jcc(Assembler::below, done);
+  __ setb(Assembler::notEqual, dst);
+  __ movzbl(dst, dst);
+  __ bind(done);
 }
 
 
@@ -530,118 +544,66 @@
 
 //=============================================================================
 #ifndef PRODUCT
-void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
+void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
   Compile* C = ra_->C;
-  if( C->in_24_bit_fp_mode() ) {
-    st->print("FLDCW  24 bit fpu control word");
-    st->print_cr(""); st->print("\t");
-  }
 
   int framesize = C->frame_slots() << LogBytesPerInt;
   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
-  // Remove two words for return addr and rbp,
-  framesize -= 2*wordSize;
-
-  // Calls to C2R adapters often do not accept exceptional returns.
-  // We require that their callers must bang for them.  But be careful, because
-  // some VM calls (such as call site linkage) can use several kilobytes of
-  // stack.  But the stack safety zone should account for that.
-  // See bugs 4446381, 4468289, 4497237.
+  // Remove wordSize for return addr which is already pushed.
+  framesize -= wordSize;
+
   if (C->need_stack_bang(framesize)) {
-    st->print_cr("# stack bang"); st->print("\t");
-  }
-  st->print_cr("PUSHL  EBP"); st->print("\t");
-
-  if( VerifyStackAtCalls ) { // Majik cookie to verify stack depth
-    st->print("PUSH   0xBADB100D\t# Majik cookie for stack depth check");
-    st->print_cr(""); st->print("\t");
     framesize -= wordSize;
-  }
-
-  if ((C->in_24_bit_fp_mode() || VerifyStackAtCalls ) && framesize < 128 ) {
+    st->print("# stack bang");
+    st->print("\n\t");
+    st->print("PUSH   EBP\t# Save EBP");
     if (framesize) {
-      st->print("SUB    ESP,%d\t# Create frame",framesize);
+      st->print("\n\t");
+      st->print("SUB    ESP, #%d\t# Create frame",framesize);
     }
   } else {
-    st->print("SUB    ESP,%d\t# Create frame",framesize);
+    st->print("SUB    ESP, #%d\t# Create frame",framesize);
+    st->print("\n\t");
+    framesize -= wordSize;
+    st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
+  }
+
+  if (VerifyStackAtCalls) {
+    st->print("\n\t");
+    framesize -= wordSize;
+    st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
   }
+
+  if( C->in_24_bit_fp_mode() ) {
+    st->print("\n\t");
+    st->print("FLDCW  \t# load 24 bit fpu control word");
+  }
+  if (UseSSE >= 2 && VerifyFPU) {
+    st->print("\n\t");
+    st->print("# verify FPU stack (must be clean on entry)");
+  }
+
+#ifdef ASSERT
+  if (VerifyStackAtCalls) {
+    st->print("\n\t");
+    st->print("# stack alignment check");
+  }
+#endif
+  st->cr();
 }
 #endif
 
 
 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
   Compile* C = ra_->C;
-
-  if (UseSSE >= 2 && VerifyFPU) {
-    MacroAssembler masm(&cbuf);
-    masm.verify_FPU(0, "FPU stack must be clean on entry");
-  }
-
-  // WARNING: Initial instruction MUST be 5 bytes or longer so that
-  // NativeJump::patch_verified_entry will be able to patch out the entry
-  // code safely. The fldcw is ok at 6 bytes, the push to verify stack
-  // depth is ok at 5 bytes, the frame allocation can be either 3 or
-  // 6 bytes. So if we don't do the fldcw or the push then we must
-  // use the 6 byte frame allocation even if we have no frame. :-(
-  // If method sets FPU control word do it now
-  if( C->in_24_bit_fp_mode() ) {
-    MacroAssembler masm(&cbuf);
-    masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
-  }
+  MacroAssembler _masm(&cbuf);
 
   int framesize = C->frame_slots() << LogBytesPerInt;
-  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
-  // Remove two words for return addr and rbp,
-  framesize -= 2*wordSize;
-
-  // Calls to C2R adapters often do not accept exceptional returns.
-  // We require that their callers must bang for them.  But be careful, because
-  // some VM calls (such as call site linkage) can use several kilobytes of
-  // stack.  But the stack safety zone should account for that.
-  // See bugs 4446381, 4468289, 4497237.
-  if (C->need_stack_bang(framesize)) {
-    MacroAssembler masm(&cbuf);
-    masm.generate_stack_overflow_check(framesize);
-  }
-
-  // We always push rbp, so that on return to interpreter rbp, will be
-  // restored correctly and we can correct the stack.
-  emit_opcode(cbuf, 0x50 | EBP_enc);
-
-  if( VerifyStackAtCalls ) { // Majik cookie to verify stack depth
-    emit_opcode(cbuf, 0x68); // push 0xbadb100d
-    emit_d32(cbuf, 0xbadb100d);
-    framesize -= wordSize;
-  }
-
-  if ((C->in_24_bit_fp_mode() || VerifyStackAtCalls ) && framesize < 128 ) {
-    if (framesize) {
-      emit_opcode(cbuf, 0x83);   // sub  SP,#framesize
-      emit_rm(cbuf, 0x3, 0x05, ESP_enc);
-      emit_d8(cbuf, framesize);
-    }
-  } else {
-    emit_opcode(cbuf, 0x81);   // sub  SP,#framesize
-    emit_rm(cbuf, 0x3, 0x05, ESP_enc);
-    emit_d32(cbuf, framesize);
-  }
+
+  __ verified_entry(framesize, C->need_stack_bang(framesize), C->in_24_bit_fp_mode());
+
   C->set_frame_complete(cbuf.insts_size());
 
-#ifdef ASSERT
-  if (VerifyStackAtCalls) {
-    Label L;
-    MacroAssembler masm(&cbuf);
-    masm.push(rax);
-    masm.mov(rax, rsp);
-    masm.andptr(rax, StackAlignmentInBytes-1);
-    masm.cmpptr(rax, StackAlignmentInBytes-wordSize);
-    masm.pop(rax);
-    masm.jcc(Assembler::equal, L);
-    masm.stop("Stack is not properly aligned!");
-    masm.bind(L);
-  }
-#endif
-
   if (C->has_mach_constant_base_node()) {
     // NOTE: We set the table base offset here because users might be
     // emitted before MachConstantBaseNode.
@@ -792,92 +754,88 @@
 // Helper for XMM registers.  Extra opcode bits, limited syntax.
 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
-  if( cbuf ) {
-    if( reg_lo+1 == reg_hi ) { // double move?
-      if( is_load && !UseXmmLoadAndClearUpper )
-        emit_opcode(*cbuf, 0x66 ); // use 'movlpd' for load
-      else
-        emit_opcode(*cbuf, 0xF2 ); // use 'movsd' otherwise
+  if (cbuf) {
+    MacroAssembler _masm(cbuf);
+    if (reg_lo+1 == reg_hi) { // double move?
+      if (is_load) {
+        __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
+      } else {
+        __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
+      }
     } else {
-      emit_opcode(*cbuf, 0xF3 );
+      if (is_load) {
+        __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
+      } else {
+        __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
+      }
     }
-    emit_opcode(*cbuf, 0x0F );
-    if( reg_lo+1 == reg_hi && is_load && !UseXmmLoadAndClearUpper )
-      emit_opcode(*cbuf, 0x12 );   // use 'movlpd' for load
-    else
-      emit_opcode(*cbuf, is_load ? 0x10 : 0x11 );
-    encode_RegMem(*cbuf, Matcher::_regEncode[reg_lo], ESP_enc, 0x4, 0, offset, false);
 #ifndef PRODUCT
-  } else if( !do_size ) {
-    if( size != 0 ) st->print("\n\t");
-    if( reg_lo+1 == reg_hi ) { // double move?
-      if( is_load ) st->print("%s %s,[ESP + #%d]",
-                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
-                               Matcher::regName[reg_lo], offset);
-      else          st->print("MOVSD  [ESP + #%d],%s",
-                               offset, Matcher::regName[reg_lo]);
+  } else if (!do_size) {
+    if (size != 0) st->print("\n\t");
+    if (reg_lo+1 == reg_hi) { // double move?
+      if (is_load) st->print("%s %s,[ESP + #%d]",
+                              UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
+                              Matcher::regName[reg_lo], offset);
+      else         st->print("MOVSD  [ESP + #%d],%s",
+                              offset, Matcher::regName[reg_lo]);
     } else {
-      if( is_load ) st->print("MOVSS  %s,[ESP + #%d]",
-                               Matcher::regName[reg_lo], offset);
-      else          st->print("MOVSS  [ESP + #%d],%s",
-                               offset, Matcher::regName[reg_lo]);
+      if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
+                              Matcher::regName[reg_lo], offset);
+      else         st->print("MOVSS  [ESP + #%d],%s",
+                              offset, Matcher::regName[reg_lo]);
     }
 #endif
   }
   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
+  // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes.
   return size+5+offset_size;
 }
 
 
 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
                             int src_hi, int dst_hi, int size, outputStream* st ) {
-  if( UseXmmRegToRegMoveAll ) {//Use movaps,movapd to move between xmm registers
-    if( cbuf ) {
-      if( (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ) {
-        emit_opcode(*cbuf, 0x66 );
-      }
-      emit_opcode(*cbuf, 0x0F );
-      emit_opcode(*cbuf, 0x28 );
-      emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
+  if (cbuf) {
+    MacroAssembler _masm(cbuf);
+    if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
+      __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
+                as_XMMRegister(Matcher::_regEncode[src_lo]));
+    } else {
+      __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
+                as_XMMRegister(Matcher::_regEncode[src_lo]));
+    }
 #ifndef PRODUCT
-    } else if( !do_size ) {
-      if( size != 0 ) st->print("\n\t");
-      if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
+  } else if (!do_size) {
+    if (size != 0) st->print("\n\t");
+    if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
+      if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
       } else {
         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
       }
-#endif
-    }
-    return size + ((src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 4 : 3);
-  } else {
-    if( cbuf ) {
-      emit_opcode(*cbuf, (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 0xF2 : 0xF3 );
-      emit_opcode(*cbuf, 0x0F );
-      emit_opcode(*cbuf, 0x10 );
-      emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
-#ifndef PRODUCT
-    } else if( !do_size ) {
-      if( size != 0 ) st->print("\n\t");
+    } else {
       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
       } else {
         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
       }
+    }
 #endif
-    }
-    return size+4;
   }
+  // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes.
+  // Only MOVAPS SSE prefix uses 1 byte.
+  int sz = 4;
+  if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
+      UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
+  return size + sz;
 }
 
 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
                             int src_hi, int dst_hi, int size, outputStream* st ) {
   // 32-bit
   if (cbuf) {
-    emit_opcode(*cbuf, 0x66);
-    emit_opcode(*cbuf, 0x0F);
-    emit_opcode(*cbuf, 0x6E);
-    emit_rm(*cbuf, 0x3, Matcher::_regEncode[dst_lo] & 7, Matcher::_regEncode[src_lo] & 7);
+    MacroAssembler _masm(cbuf);
+    __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
+             as_Register(Matcher::_regEncode[src_lo]));
 #ifndef PRODUCT
   } else if (!do_size) {
     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
@@ -891,10 +849,9 @@
                                  int src_hi, int dst_hi, int size, outputStream* st ) {
   // 32-bit
   if (cbuf) {
-    emit_opcode(*cbuf, 0x66);
-    emit_opcode(*cbuf, 0x0F);
-    emit_opcode(*cbuf, 0x7E);
-    emit_rm(*cbuf, 0x3, Matcher::_regEncode[src_lo] & 7, Matcher::_regEncode[dst_lo] & 7);
+    MacroAssembler _masm(cbuf);
+    __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
+             as_XMMRegister(Matcher::_regEncode[src_lo]));
 #ifndef PRODUCT
   } else if (!do_size) {
     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
@@ -1154,7 +1111,7 @@
 }
 
 #ifndef PRODUCT
-void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
+void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
   implementation( NULL, ra_, false, st );
 }
 #endif
@@ -1167,22 +1124,6 @@
   return implementation( NULL, ra_, true, NULL );
 }
 
-//=============================================================================
-#ifndef PRODUCT
-void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const {
-  st->print("NOP \t# %d bytes pad for loops and calls", _count);
-}
-#endif
-
-void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const {
-  MacroAssembler _masm(&cbuf);
-  __ nop(_count);
-}
-
-uint MachNopNode::size(PhaseRegAlloc *) const {
-  return _count;
-}
-
 
 //=============================================================================
 #ifndef PRODUCT
@@ -1760,7 +1701,7 @@
     emit_cc(cbuf, $secondary, $cop$$cmpcode);
   %}
 
-  enc_class enc_cmov_d(cmpOp cop, regD src ) %{ // CMOV
+  enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
     emit_d8(cbuf, op >> 8 );
     emit_d8(cbuf, op & 255);
@@ -1868,21 +1809,6 @@
     }
   %}
 
-  enc_class preserve_SP %{
-    debug_only(int off0 = cbuf.insts_size());
-    MacroAssembler _masm(&cbuf);
-    // RBP is preserved across all calls, even compiled calls.
-    // Use it to preserve RSP in places where the callee might change the SP.
-    __ movptr(rbp_mh_SP_save, rsp);
-    debug_only(int off1 = cbuf.insts_size());
-    assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
-  %}
-
-  enc_class restore_SP %{
-    MacroAssembler _masm(&cbuf);
-    __ movptr(rsp, rbp_mh_SP_save);
-  %}
-
   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
     // who we intended to call.
@@ -1931,11 +1857,6 @@
 
   %}
 
-  enc_class Xor_Reg (eRegI dst) %{
-    emit_opcode(cbuf, 0x33);
-    emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
-  %}
-
 //   Following encoding is no longer used, but may be restored if calling
 //   convention changes significantly.
 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
@@ -2013,64 +1934,6 @@
   %}
 
 
-  enc_class MovI2X_reg(regX dst, eRegI src) %{
-    emit_opcode(cbuf, 0x66 );     // MOVD dst,src
-    emit_opcode(cbuf, 0x0F );
-    emit_opcode(cbuf, 0x6E );
-    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
-  %}
-
-  enc_class MovX2I_reg(eRegI dst, regX src) %{
-    emit_opcode(cbuf, 0x66 );     // MOVD dst,src
-    emit_opcode(cbuf, 0x0F );
-    emit_opcode(cbuf, 0x7E );
-    emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg);
-  %}
-
-  enc_class MovL2XD_reg(regXD dst, eRegL src, regXD tmp) %{
-    { // MOVD $dst,$src.lo
-      emit_opcode(cbuf,0x66);
-      emit_opcode(cbuf,0x0F);
-      emit_opcode(cbuf,0x6E);
-      emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
-    }
-    { // MOVD $tmp,$src.hi
-      emit_opcode(cbuf,0x66);
-      emit_opcode(cbuf,0x0F);
-      emit_opcode(cbuf,0x6E);
-      emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
-    }
-    { // PUNPCKLDQ $dst,$tmp
-      emit_opcode(cbuf,0x66);
-      emit_opcode(cbuf,0x0F);
-      emit_opcode(cbuf,0x62);
-      emit_rm(cbuf, 0x3, $dst$$reg, $tmp$$reg);
-     }
-  %}
-
-  enc_class MovXD2L_reg(eRegL dst, regXD src, regXD tmp) %{
-    { // MOVD $dst.lo,$src
-      emit_opcode(cbuf,0x66);
-      emit_opcode(cbuf,0x0F);
-      emit_opcode(cbuf,0x7E);
-      emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg);
-    }
-    { // PSHUFLW $tmp,$src,0x4E  (01001110b)
-      emit_opcode(cbuf,0xF2);
-      emit_opcode(cbuf,0x0F);
-      emit_opcode(cbuf,0x70);
-      emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
-      emit_d8(cbuf, 0x4E);
-    }
-    { // MOVD $dst.hi,$tmp
-      emit_opcode(cbuf,0x66);
-      emit_opcode(cbuf,0x0F);
-      emit_opcode(cbuf,0x7E);
-      emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg));
-    }
-  %}
-
-
   // Encode a reg-reg copy.  If it is useless, then empty encoding.
   enc_class enc_Copy( eRegI dst, eRegI src ) %{
     encode_Copy( cbuf, $dst$$reg, $src$$reg );
@@ -2080,11 +1943,6 @@
     encode_Copy( cbuf, $dst$$reg, $src$$reg );
   %}
 
-  // Encode xmm reg-reg copy.  If it is useless, then empty encoding.
-  enc_class enc_CopyXD( RegXD dst, RegXD src ) %{
-    encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
-  %}
-
   enc_class RegReg (eRegI dst, eRegI src) %{    // RegReg(Many)
     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
   %}
@@ -2116,14 +1974,14 @@
     $$$emit32$src$$constant;
   %}
 
-  enc_class Con32F_as_bits(immF src) %{        // storeF_imm
+  enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
     // Output Float immediate bits
     jfloat jf = $src$$constant;
     int    jf_as_bits = jint_cast( jf );
     emit_d32(cbuf, jf_as_bits);
   %}
 
-  enc_class Con32XF_as_bits(immXF src) %{      // storeX_imm
+  enc_class Con32F_as_bits(immF src) %{      // storeX_imm
     // Output Float immediate bits
     jfloat jf = $src$$constant;
     int    jf_as_bits = jint_cast( jf );
@@ -2336,7 +2194,7 @@
     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
   %}
 
-  enc_class enc_FP_store(memory mem, regD src) %{
+  enc_class enc_FPR_store(memory mem, regDPR src) %{
     // If src is FPR1, we can just FST to store it.
     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
     int reg_encoding = 0x2; // Just store
@@ -2485,7 +2343,7 @@
 
   // ----------------- Encodings for floating point unit -----------------
   // May leave result in FPU-TOS or FPU reg depending on opcodes
-  enc_class OpcReg_F (regF src) %{    // FMUL, FDIV
+  enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
     $$$emit8$primary;
     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
   %}
@@ -2497,17 +2355,17 @@
   %}
 
   // !!!!! equivalent to Pop_Reg_F
-  enc_class Pop_Reg_D( regD dst ) %{
+  enc_class Pop_Reg_DPR( regDPR dst ) %{
     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
     emit_d8( cbuf, 0xD8+$dst$$reg );
   %}
 
-  enc_class Push_Reg_D( regD dst ) %{
+  enc_class Push_Reg_DPR( regDPR dst ) %{
     emit_opcode( cbuf, 0xD9 );
     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
   %}
 
-  enc_class strictfp_bias1( regD dst ) %{
+  enc_class strictfp_bias1( regDPR dst ) %{
     emit_opcode( cbuf, 0xDB );           // FLD m80real
     emit_opcode( cbuf, 0x2D );
     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
@@ -2515,7 +2373,7 @@
     emit_opcode( cbuf, 0xC8+$dst$$reg );
   %}
 
-  enc_class strictfp_bias2( regD dst ) %{
+  enc_class strictfp_bias2( regDPR dst ) %{
     emit_opcode( cbuf, 0xDB );           // FLD m80real
     emit_opcode( cbuf, 0x2D );
     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
@@ -2541,39 +2399,29 @@
     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
   %}
 
-  // Push the float in stackSlot 'src' onto FP-stack
-  enc_class Push_Mem_F( memory src ) %{    // FLD_S   [ESP+src]
-    store_to_stackslot( cbuf, 0xD9, 0x00, $src$$disp );
-  %}
-
-  // Push the double in stackSlot 'src' onto FP-stack
-  enc_class Push_Mem_D( memory src ) %{    // FLD_D   [ESP+src]
-    store_to_stackslot( cbuf, 0xDD, 0x00, $src$$disp );
-  %}
-
   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
-  enc_class Pop_Mem_F( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
+  enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
   %}
 
   // Same as Pop_Mem_F except for opcode
   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
-  enc_class Pop_Mem_D( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
+  enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
   %}
 
-  enc_class Pop_Reg_F( regF dst ) %{
+  enc_class Pop_Reg_FPR( regFPR dst ) %{
     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
     emit_d8( cbuf, 0xD8+$dst$$reg );
   %}
 
-  enc_class Push_Reg_F( regF dst ) %{
+  enc_class Push_Reg_FPR( regFPR dst ) %{
     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
     emit_d8( cbuf, 0xC0-1+$dst$$reg );
   %}
 
   // Push FPU's float to a stack-slot, and pop FPU-stack
-  enc_class Pop_Mem_Reg_F( stackSlotF dst, regF src ) %{
+  enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
     int pop = 0x02;
     if ($src$$reg != FPR1L_enc) {
       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
@@ -2584,7 +2432,7 @@
   %}
 
   // Push FPU's double to a stack-slot, and pop FPU-stack
-  enc_class Pop_Mem_Reg_D( stackSlotD dst, regD src ) %{
+  enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
     int pop = 0x02;
     if ($src$$reg != FPR1L_enc) {
       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
@@ -2595,7 +2443,7 @@
   %}
 
   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
-  enc_class Pop_Reg_Reg_D( regD dst, regF src ) %{
+  enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
     int pop = 0xD0 - 1; // -1 since we skip FLD
     if ($src$$reg != FPR1L_enc) {
       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
@@ -2607,16 +2455,7 @@
   %}
 
 
-  enc_class Mul_Add_F( regF dst, regF src, regF src1, regF src2 ) %{
-    MacroAssembler masm(&cbuf);
-    masm.fld_s(  $src1$$reg-1);   // nothing at TOS, load TOS from src1.reg
-    masm.fmul(   $src2$$reg+0);   // value at TOS
-    masm.fadd(   $src$$reg+0);    // value at TOS
-    masm.fstp_d( $dst$$reg+0);    // value at TOS, popped off after store
-  %}
-
-
-  enc_class Push_Reg_Mod_D( regD dst, regD src) %{
+  enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
     // load dst in FPR0
     emit_opcode( cbuf, 0xD9 );
     emit_d8( cbuf, 0xC0-1+$dst$$reg );
@@ -2634,116 +2473,59 @@
     }
   %}
 
-  enc_class Push_ModD_encoding( regXD src0, regXD src1) %{
-    // Allocate a word
-    emit_opcode(cbuf,0x83);            // SUB ESP,8
-    emit_opcode(cbuf,0xEC);
-    emit_d8(cbuf,0x08);
-
-    emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], src1
-    emit_opcode  (cbuf, 0x0F );
-    emit_opcode  (cbuf, 0x11 );
-    encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
-    encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], src0
-    emit_opcode  (cbuf, 0x0F );
-    emit_opcode  (cbuf, 0x11 );
-    encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
-    encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
-
-  %}
-
-  enc_class Push_ModX_encoding( regX src0, regX src1) %{
-    // Allocate a word
-    emit_opcode(cbuf,0x83);            // SUB ESP,4
-    emit_opcode(cbuf,0xEC);
-    emit_d8(cbuf,0x04);
-
-    emit_opcode  (cbuf, 0xF3 );     // MOVSS [ESP], src1
-    emit_opcode  (cbuf, 0x0F );
-    emit_opcode  (cbuf, 0x11 );
-    encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf,0xD9 );      // FLD [ESP]
-    encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode  (cbuf, 0xF3 );     // MOVSS [ESP], src0
-    emit_opcode  (cbuf, 0x0F );
-    emit_opcode  (cbuf, 0x11 );
-    encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf,0xD9 );      // FLD [ESP]
-    encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
-
-  %}
-
-  enc_class Push_ResultXD(regXD dst) %{
-    store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [ESP]
-
-    // UseXmmLoadAndClearUpper ? movsd dst,[esp] : movlpd dst,[esp]
-    emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
-    emit_opcode  (cbuf, 0x0F );
-    emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
-    encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf,0x83);    // ADD ESP,8
-    emit_opcode(cbuf,0xC4);
-    emit_d8(cbuf,0x08);
-  %}
-
-  enc_class Push_ResultX(regX dst, immI d8) %{
-    store_to_stackslot( cbuf, 0xD9, 0x03, 0 ); //FSTP_S [ESP]
-
-    emit_opcode  (cbuf, 0xF3 );     // MOVSS dst(xmm), [ESP]
-    emit_opcode  (cbuf, 0x0F );
-    emit_opcode  (cbuf, 0x10 );
-    encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf,0x83);    // ADD ESP,d8 (4 or 8)
-    emit_opcode(cbuf,0xC4);
-    emit_d8(cbuf,$d8$$constant);
-  %}
-
-  enc_class Push_SrcXD(regXD src) %{
-    // Allocate a word
-    emit_opcode(cbuf,0x83);            // SUB ESP,8
-    emit_opcode(cbuf,0xEC);
-    emit_d8(cbuf,0x08);
-
-    emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], src
-    emit_opcode  (cbuf, 0x0F );
-    emit_opcode  (cbuf, 0x11 );
-    encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
-    encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
+  enc_class Push_ModD_encoding(regD src0, regD src1) %{
+    MacroAssembler _masm(&cbuf);
+    __ subptr(rsp, 8);
+    __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
+    __ fld_d(Address(rsp, 0));
+    __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
+    __ fld_d(Address(rsp, 0));
+  %}
+
+  enc_class Push_ModF_encoding(regF src0, regF src1) %{
+    MacroAssembler _masm(&cbuf);
+    __ subptr(rsp, 4);
+    __ movflt(Address(rsp, 0), $src1$$XMMRegister);
+    __ fld_s(Address(rsp, 0));
+    __ movflt(Address(rsp, 0), $src0$$XMMRegister);
+    __ fld_s(Address(rsp, 0));
+  %}
+
+  enc_class Push_ResultD(regD dst) %{
+    MacroAssembler _masm(&cbuf);
+    __ fstp_d(Address(rsp, 0));
+    __ movdbl($dst$$XMMRegister, Address(rsp, 0));
+    __ addptr(rsp, 8);
+  %}
+
+  enc_class Push_ResultF(regF dst, immI d8) %{
+    MacroAssembler _masm(&cbuf);
+    __ fstp_s(Address(rsp, 0));
+    __ movflt($dst$$XMMRegister, Address(rsp, 0));
+    __ addptr(rsp, $d8$$constant);
+  %}
+
+  enc_class Push_SrcD(regD src) %{
+    MacroAssembler _masm(&cbuf);
+    __ subptr(rsp, 8);
+    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
+    __ fld_d(Address(rsp, 0));
   %}
 
   enc_class push_stack_temp_qword() %{
-    emit_opcode(cbuf,0x83);     // SUB ESP,8
-    emit_opcode(cbuf,0xEC);
-    emit_d8    (cbuf,0x08);
+    MacroAssembler _masm(&cbuf);
+    __ subptr(rsp, 8);
   %}
 
   enc_class pop_stack_temp_qword() %{
-    emit_opcode(cbuf,0x83);     // ADD ESP,8
-    emit_opcode(cbuf,0xC4);
-    emit_d8    (cbuf,0x08);
-  %}
-
-  enc_class push_xmm_to_fpr1( regXD xmm_src ) %{
-    emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], xmm_src
-    emit_opcode  (cbuf, 0x0F );
-    emit_opcode  (cbuf, 0x11 );
-    encode_RegMem(cbuf, $xmm_src$$reg, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
-    encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
+    MacroAssembler _masm(&cbuf);
+    __ addptr(rsp, 8);
+  %}
+
+  enc_class push_xmm_to_fpr1(regD src) %{
+    MacroAssembler _masm(&cbuf);
+    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
+    __ fld_d(Address(rsp, 0));
   %}
 
   // Compute X^Y using Intel's fast hardware instructions, if possible.
@@ -2785,10 +2567,7 @@
     encode_RegMem(cbuf, 0x1, ESP_enc, 0x4, 0, 0, false);
   %}
 
-//   enc_class Pop_Reg_Mod_D( regD dst, regD src)
-//   was replaced by Push_Result_Mod_D followed by Pop_Reg_X() or Pop_Mem_X()
-
-  enc_class Push_Result_Mod_D( regD src) %{
+  enc_class Push_Result_Mod_DPR( regDPR src) %{
     if ($src$$reg != FPR1L_enc) {
       // fincstp
       emit_opcode (cbuf, 0xD9);
@@ -2817,7 +2596,7 @@
     emit_opcode( cbuf, 0x05 );
   %}
 
-  enc_class emitModD() %{
+  enc_class emitModDPR() %{
     // fprem must be iterative
     // :: loop
     // fprem
@@ -2922,24 +2701,6 @@
   %}
 
 
-  // XMM version of CmpF_Result. Because the XMM compare
-  // instructions set the EFLAGS directly. It becomes simpler than
-  // the float version above.
-  enc_class CmpX_Result(eRegI dst) %{
-    MacroAssembler _masm(&cbuf);
-    Label nan, inc, done;
-
-    __ jccb(Assembler::parity, nan);
-    __ jccb(Assembler::equal,  done);
-    __ jccb(Assembler::above,  inc);
-    __ bind(nan);
-    __ decrement(as_Register($dst$$reg)); // NO L qqq
-    __ jmpb(done);
-    __ bind(inc);
-    __ increment(as_Register($dst$$reg)); // NO L qqq
-    __ bind(done);
-  %}
-
   // Compare the longs and set flags
   // BROKEN!  Do Not use as-is
   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
@@ -3162,48 +2923,6 @@
     emit_d8    (cbuf,0 );
   %}
 
-  enc_class movq_ld(regXD dst, memory mem) %{
-    MacroAssembler _masm(&cbuf);
-    __ movq($dst$$XMMRegister, $mem$$Address);
-  %}
-
-  enc_class movq_st(memory mem, regXD src) %{
-    MacroAssembler _masm(&cbuf);
-    __ movq($mem$$Address, $src$$XMMRegister);
-  %}
-
-  enc_class pshufd_8x8(regX dst, regX src) %{
-    MacroAssembler _masm(&cbuf);
-
-    encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
-    __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
-    __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
-  %}
-
-  enc_class pshufd_4x16(regX dst, regX src) %{
-    MacroAssembler _masm(&cbuf);
-
-    __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
-  %}
-
-  enc_class pshufd(regXD dst, regXD src, int mode) %{
-    MacroAssembler _masm(&cbuf);
-
-    __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
-  %}
-
-  enc_class pxor(regXD dst, regXD src) %{
-    MacroAssembler _masm(&cbuf);
-
-    __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
-  %}
-
-  enc_class mov_i2x(regXD dst, eRegI src) %{
-    MacroAssembler _masm(&cbuf);
-
-    __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
-  %}
-
 
   // Because the transitions from emitted code to the runtime
   // monitorenter/exit helper stubs are so slow it's critical that
@@ -3757,7 +3476,7 @@
   // 'zero', store the darned double down as an int, and reset the
   // rounding mode to 'nearest'.  The hardware throws an exception which
   // patches up the correct value directly to the stack.
-  enc_class D2I_encoding( regD src ) %{
+  enc_class DPR2I_encoding( regDPR src ) %{
     // Flip to round-to-zero mode.  We attempted to allow invalid-op
     // exceptions here, so that a NAN or other corner-case value will
     // thrown an exception (but normal values get converted at full speed).
@@ -3800,7 +3519,7 @@
     // Carry on here...
   %}
 
-  enc_class D2L_encoding( regD src ) %{
+  enc_class DPR2L_encoding( regDPR src ) %{
     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
     emit_opcode(cbuf,0x2D);
     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
@@ -3842,294 +3561,27 @@
     // Carry on here...
   %}
 
-  enc_class X2L_encoding( regX src ) %{
-    // Allocate a word
-    emit_opcode(cbuf,0x83);      // SUB ESP,8
-    emit_opcode(cbuf,0xEC);
-    emit_d8(cbuf,0x08);
-
-    emit_opcode  (cbuf, 0xF3 );  // MOVSS [ESP], src
-    emit_opcode  (cbuf, 0x0F );
-    emit_opcode  (cbuf, 0x11 );
-    encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf,0xD9 );     // FLD_S [ESP]
-    encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf,0xD9);      // FLDCW  trunc
-    emit_opcode(cbuf,0x2D);
-    emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
-
-    // Encoding assumes a double has been pushed into FPR0.
-    // Store down the double as a long, popping the FPU stack
-    emit_opcode(cbuf,0xDF);      // FISTP [ESP]
-    emit_opcode(cbuf,0x3C);
-    emit_d8(cbuf,0x24);
-
-    // Restore the rounding mode; mask the exception
-    emit_opcode(cbuf,0xD9);      // FLDCW   std/24-bit mode
-    emit_opcode(cbuf,0x2D);
-    emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
-      ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
-      : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
-
-    // Load the converted int; adjust CPU stack
-    emit_opcode(cbuf,0x58);      // POP EAX
-
-    emit_opcode(cbuf,0x5A);      // POP EDX
-
-    emit_opcode(cbuf,0x81);      // CMP EDX,imm
-    emit_d8    (cbuf,0xFA);      // rdx
-    emit_d32   (cbuf,0x80000000);//         0x80000000
-
-    emit_opcode(cbuf,0x75);      // JNE around_slow_call
-    emit_d8    (cbuf,0x13+4);    // Size of slow_call
-
-    emit_opcode(cbuf,0x85);      // TEST EAX,EAX
-    emit_opcode(cbuf,0xC0);      // 2/rax,/rax,
-
-    emit_opcode(cbuf,0x75);      // JNE around_slow_call
-    emit_d8    (cbuf,0x13);      // Size of slow_call
-
-    // Allocate a word
-    emit_opcode(cbuf,0x83);      // SUB ESP,4
-    emit_opcode(cbuf,0xEC);
-    emit_d8(cbuf,0x04);
-
-    emit_opcode  (cbuf, 0xF3 );  // MOVSS [ESP], src
-    emit_opcode  (cbuf, 0x0F );
-    emit_opcode  (cbuf, 0x11 );
-    encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf,0xD9 );     // FLD_S [ESP]
-    encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf,0x83);      // ADD ESP,4
-    emit_opcode(cbuf,0xC4);
-    emit_d8(cbuf,0x04);
-
-    // CALL directly to the runtime
-    cbuf.set_insts_mark();
-    emit_opcode(cbuf,0xE8);       // Call into runtime
-    emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
-    // Carry on here...
-  %}
-
-  enc_class XD2L_encoding( regXD src ) %{
-    // Allocate a word
-    emit_opcode(cbuf,0x83);      // SUB ESP,8
-    emit_opcode(cbuf,0xEC);
-    emit_d8(cbuf,0x08);
-
-    emit_opcode  (cbuf, 0xF2 );  // MOVSD [ESP], src
-    emit_opcode  (cbuf, 0x0F );
-    emit_opcode  (cbuf, 0x11 );
-    encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf,0xDD );     // FLD_D [ESP]
-    encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf,0xD9);      // FLDCW  trunc
-    emit_opcode(cbuf,0x2D);
-    emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
-
-    // Encoding assumes a double has been pushed into FPR0.
-    // Store down the double as a long, popping the FPU stack
-    emit_opcode(cbuf,0xDF);      // FISTP [ESP]
-    emit_opcode(cbuf,0x3C);
-    emit_d8(cbuf,0x24);
-
-    // Restore the rounding mode; mask the exception
-    emit_opcode(cbuf,0xD9);      // FLDCW   std/24-bit mode
-    emit_opcode(cbuf,0x2D);
-    emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
-      ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
-      : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
-
-    // Load the converted int; adjust CPU stack
-    emit_opcode(cbuf,0x58);      // POP EAX
-
-    emit_opcode(cbuf,0x5A);      // POP EDX
-
-    emit_opcode(cbuf,0x81);      // CMP EDX,imm
-    emit_d8    (cbuf,0xFA);      // rdx
-    emit_d32   (cbuf,0x80000000); //         0x80000000
-
-    emit_opcode(cbuf,0x75);      // JNE around_slow_call
-    emit_d8    (cbuf,0x13+4);    // Size of slow_call
-
-    emit_opcode(cbuf,0x85);      // TEST EAX,EAX
-    emit_opcode(cbuf,0xC0);      // 2/rax,/rax,
-
-    emit_opcode(cbuf,0x75);      // JNE around_slow_call
-    emit_d8    (cbuf,0x13);      // Size of slow_call
-
-    // Push src onto stack slow-path
-    // Allocate a word
-    emit_opcode(cbuf,0x83);      // SUB ESP,8
-    emit_opcode(cbuf,0xEC);
-    emit_d8(cbuf,0x08);
-
-    emit_opcode  (cbuf, 0xF2 );  // MOVSD [ESP], src
-    emit_opcode  (cbuf, 0x0F );
-    emit_opcode  (cbuf, 0x11 );
-    encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf,0xDD );     // FLD_D [ESP]
-    encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf,0x83);      // ADD ESP,8
-    emit_opcode(cbuf,0xC4);
-    emit_d8(cbuf,0x08);
-
-    // CALL directly to the runtime
-    cbuf.set_insts_mark();
-    emit_opcode(cbuf,0xE8);      // Call into runtime
-    emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
-    // Carry on here...
-  %}
-
-  enc_class D2X_encoding( regX dst, regD src ) %{
-    // Allocate a word
-    emit_opcode(cbuf,0x83);            // SUB ESP,4
-    emit_opcode(cbuf,0xEC);
-    emit_d8(cbuf,0x04);
-    int pop = 0x02;
-    if ($src$$reg != FPR1L_enc) {
-      emit_opcode( cbuf, 0xD9 );       // FLD    ST(i-1)
-      emit_d8( cbuf, 0xC0-1+$src$$reg );
-      pop = 0x03;
-    }
-    store_to_stackslot( cbuf, 0xD9, pop, 0 ); // FST<P>_S  [ESP]
-
-    emit_opcode  (cbuf, 0xF3 );        // MOVSS dst(xmm), [ESP]
-    emit_opcode  (cbuf, 0x0F );
-    emit_opcode  (cbuf, 0x10 );
-    encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf,0x83);            // ADD ESP,4
-    emit_opcode(cbuf,0xC4);
-    emit_d8(cbuf,0x04);
-    // Carry on here...
-  %}
-
-  enc_class FX2I_encoding( regX src, eRegI dst ) %{
-    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
-
-    // Compare the result to see if we need to go to the slow path
-    emit_opcode(cbuf,0x81);       // CMP dst,imm
-    emit_rm    (cbuf,0x3,0x7,$dst$$reg);
-    emit_d32   (cbuf,0x80000000); //         0x80000000
-
-    emit_opcode(cbuf,0x75);       // JNE around_slow_call
-    emit_d8    (cbuf,0x13);       // Size of slow_call
-    // Store xmm to a temp memory
-    // location and push it onto stack.
-
-    emit_opcode(cbuf,0x83);  // SUB ESP,4
-    emit_opcode(cbuf,0xEC);
-    emit_d8(cbuf, $primary ? 0x8 : 0x4);
-
-    emit_opcode  (cbuf, $primary ? 0xF2 : 0xF3 );   // MOVSS [ESP], xmm
-    emit_opcode  (cbuf, 0x0F );
-    emit_opcode  (cbuf, 0x11 );
-    encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf, $primary ? 0xDD : 0xD9 );      // FLD [ESP]
-    encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf,0x83);    // ADD ESP,4
-    emit_opcode(cbuf,0xC4);
-    emit_d8(cbuf, $primary ? 0x8 : 0x4);
-
-    // CALL directly to the runtime
-    cbuf.set_insts_mark();
-    emit_opcode(cbuf,0xE8);       // Call into runtime
-    emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
-
-    // Carry on here...
-  %}
-
-  enc_class X2D_encoding( regD dst, regX src ) %{
-    // Allocate a word
-    emit_opcode(cbuf,0x83);     // SUB ESP,4
-    emit_opcode(cbuf,0xEC);
-    emit_d8(cbuf,0x04);
-
-    emit_opcode  (cbuf, 0xF3 ); // MOVSS [ESP], xmm
-    emit_opcode  (cbuf, 0x0F );
-    emit_opcode  (cbuf, 0x11 );
-    encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf,0xD9 );    // FLD_S [ESP]
-    encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
-
-    emit_opcode(cbuf,0x83);     // ADD ESP,4
-    emit_opcode(cbuf,0xC4);
-    emit_d8(cbuf,0x04);
-
-    // Carry on here...
-  %}
-
-  enc_class AbsXF_encoding(regX dst) %{
-    address signmask_address=(address)float_signmask_pool;
-    // andpd:\tANDPS  $dst,[signconst]
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, 0x54);
-    emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
-    emit_d32(cbuf, (int)signmask_address);
-  %}
-
-  enc_class AbsXD_encoding(regXD dst) %{
-    address signmask_address=(address)double_signmask_pool;
-    // andpd:\tANDPD  $dst,[signconst]
-    emit_opcode(cbuf, 0x66);
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, 0x54);
-    emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
-    emit_d32(cbuf, (int)signmask_address);
-  %}
-
-  enc_class NegXF_encoding(regX dst) %{
-    address signmask_address=(address)float_signflip_pool;
-    // andpd:\tXORPS  $dst,[signconst]
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, 0x57);
-    emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
-    emit_d32(cbuf, (int)signmask_address);
-  %}
-
-  enc_class NegXD_encoding(regXD dst) %{
-    address signmask_address=(address)double_signflip_pool;
-    // andpd:\tXORPD  $dst,[signconst]
-    emit_opcode(cbuf, 0x66);
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, 0x57);
-    emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
-    emit_d32(cbuf, (int)signmask_address);
-  %}
-
-  enc_class FMul_ST_reg( eRegF src1 ) %{
+  enc_class FMul_ST_reg( eRegFPR src1 ) %{
     // Operand was loaded from memory into fp ST (stack top)
     // FMUL   ST,$src  /* D8 C8+i */
     emit_opcode(cbuf, 0xD8);
     emit_opcode(cbuf, 0xC8 + $src1$$reg);
   %}
 
-  enc_class FAdd_ST_reg( eRegF src2 ) %{
+  enc_class FAdd_ST_reg( eRegFPR src2 ) %{
     // FADDP  ST,src2  /* D8 C0+i */
     emit_opcode(cbuf, 0xD8);
     emit_opcode(cbuf, 0xC0 + $src2$$reg);
     //could use FADDP  src2,fpST  /* DE C0+i */
   %}
 
-  enc_class FAddP_reg_ST( eRegF src2 ) %{
+  enc_class FAddP_reg_ST( eRegFPR src2 ) %{
     // FADDP  src2,ST  /* DE C0+i */
     emit_opcode(cbuf, 0xDE);
     emit_opcode(cbuf, 0xC0 + $src2$$reg);
   %}
 
-  enc_class subF_divF_encode( eRegF src1, eRegF src2) %{
+  enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
     // Operand has been loaded into fp ST (stack top)
       // FSUB   ST,$src1
       emit_opcode(cbuf, 0xD8);
@@ -4140,7 +3592,7 @@
       emit_opcode(cbuf, 0xF0 + $src2$$reg);
   %}
 
-  enc_class MulFAddF (eRegF src1, eRegF src2) %{
+  enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
     // Operand was loaded from memory into fp ST (stack top)
     // FADD   ST,$src  /* D8 C0+i */
     emit_opcode(cbuf, 0xD8);
@@ -4152,7 +3604,7 @@
   %}
 
 
-  enc_class MulFAddFreverse (eRegF src1, eRegF src2) %{
+  enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
     // Operand was loaded from memory into fp ST (stack top)
     // FADD   ST,$src  /* D8 C0+i */
     emit_opcode(cbuf, 0xD8);
@@ -4176,66 +3628,6 @@
     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
   %}
 
-  enc_class enc_loadLX_volatile( memory mem, stackSlotL dst, regXD tmp ) %{
-    { // Atomic long load
-      // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem
-      emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
-      emit_opcode(cbuf,0x0F);
-      emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
-      int base     = $mem$$base;
-      int index    = $mem$$index;
-      int scale    = $mem$$scale;
-      int displace = $mem$$disp;
-      bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
-      encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
-    }
-    { // MOVSD $dst,$tmp ! atomic long store
-      emit_opcode(cbuf,0xF2);
-      emit_opcode(cbuf,0x0F);
-      emit_opcode(cbuf,0x11);
-      int base     = $dst$$base;
-      int index    = $dst$$index;
-      int scale    = $dst$$scale;
-      int displace = $dst$$disp;
-      bool disp_is_oop = $dst->disp_is_oop(); // disp-as-oop when working with static globals
-      encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
-    }
-  %}
-
-  enc_class enc_loadLX_reg_volatile( memory mem, eRegL dst, regXD tmp ) %{
-    { // Atomic long load
-      // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem
-      emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
-      emit_opcode(cbuf,0x0F);
-      emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
-      int base     = $mem$$base;
-      int index    = $mem$$index;
-      int scale    = $mem$$scale;
-      int displace = $mem$$disp;
-      bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
-      encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
-    }
-    { // MOVD $dst.lo,$tmp
-      emit_opcode(cbuf,0x66);
-      emit_opcode(cbuf,0x0F);
-      emit_opcode(cbuf,0x7E);
-      emit_rm(cbuf, 0x3, $tmp$$reg, $dst$$reg);
-    }
-    { // PSRLQ $tmp,32
-      emit_opcode(cbuf,0x66);
-      emit_opcode(cbuf,0x0F);
-      emit_opcode(cbuf,0x73);
-      emit_rm(cbuf, 0x3, 0x02, $tmp$$reg);
-      emit_d8(cbuf, 0x20);
-    }
-    { // MOVD $dst.hi,$tmp
-      emit_opcode(cbuf,0x66);
-      emit_opcode(cbuf,0x0F);
-      emit_opcode(cbuf,0x7E);
-      emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg));
-    }
-  %}
-
   // Volatile Store Long.  Must be atomic, so move it into
   // the FP TOS and then do a 64-bit FIST.  Has to probe the
   // target address before the store (for null-ptr checks)
@@ -4253,66 +3645,6 @@
     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
   %}
 
-  enc_class enc_storeLX_volatile( memory mem, stackSlotL src, regXD tmp) %{
-    { // Atomic long load
-      // UseXmmLoadAndClearUpper ? movsd $tmp,[$src] : movlpd $tmp,[$src]
-      emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
-      emit_opcode(cbuf,0x0F);
-      emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
-      int base     = $src$$base;
-      int index    = $src$$index;
-      int scale    = $src$$scale;
-      int displace = $src$$disp;
-      bool disp_is_oop = $src->disp_is_oop(); // disp-as-oop when working with static globals
-      encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
-    }
-    cbuf.set_insts_mark();            // Mark start of MOVSD in case $mem has an oop
-    { // MOVSD $mem,$tmp ! atomic long store
-      emit_opcode(cbuf,0xF2);
-      emit_opcode(cbuf,0x0F);
-      emit_opcode(cbuf,0x11);
-      int base     = $mem$$base;
-      int index    = $mem$$index;
-      int scale    = $mem$$scale;
-      int displace = $mem$$disp;
-      bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
-      encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
-    }
-  %}
-
-  enc_class enc_storeLX_reg_volatile( memory mem, eRegL src, regXD tmp, regXD tmp2) %{
-    { // MOVD $tmp,$src.lo
-      emit_opcode(cbuf,0x66);
-      emit_opcode(cbuf,0x0F);
-      emit_opcode(cbuf,0x6E);
-      emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
-    }
-    { // MOVD $tmp2,$src.hi
-      emit_opcode(cbuf,0x66);
-      emit_opcode(cbuf,0x0F);
-      emit_opcode(cbuf,0x6E);
-      emit_rm(cbuf, 0x3, $tmp2$$reg, HIGH_FROM_LOW($src$$reg));
-    }
-    { // PUNPCKLDQ $tmp,$tmp2
-      emit_opcode(cbuf,0x66);
-      emit_opcode(cbuf,0x0F);
-      emit_opcode(cbuf,0x62);
-      emit_rm(cbuf, 0x3, $tmp$$reg, $tmp2$$reg);
-    }
-    cbuf.set_insts_mark();            // Mark start of MOVSD in case $mem has an oop
-    { // MOVSD $mem,$tmp ! atomic long store
-      emit_opcode(cbuf,0xF2);
-      emit_opcode(cbuf,0x0F);
-      emit_opcode(cbuf,0x11);
-      int base     = $mem$$base;
-      int index    = $mem$$index;
-      int scale    = $mem$$scale;
-      int displace = $mem$$disp;
-      bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
-      encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
-    }
-  %}
-
   // Safepoint Poll.  This polls the safepoint page, and causes an
   // exception if it is not readable. Unfortunately, it kills the condition code
   // in the process
@@ -4425,9 +3757,9 @@
   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
   // Otherwise, it is above the locks and verification slot and alignment word
   return_addr(STACK - 1 +
-              round_to(1+VerifyStackAtCalls+
-              Compile::current()->fixed_slots(),
-              (StackAlignmentInBytes/wordSize)));
+              round_to((Compile::current()->in_preserve_stack_slots() +
+                        Compile::current()->fixed_slots()),
+                       stack_alignment_in_slots()));
 
   // Body of function which returns an integer array locating
   // arguments either in registers or in stack slots.  Passed an array
@@ -4705,7 +4037,7 @@
 %}
 
 //Double Immediate zero
-operand immD0() %{
+operand immDPR0() %{
   // Do additional (and counter-intuitive) test against NaN to work around VC++
   // bug that generates code such that NaNs compare equal to 0.0
   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
@@ -4717,7 +4049,7 @@
 %}
 
 // Double Immediate one
-operand immD1() %{
+operand immDPR1() %{
   predicate( UseSSE<=1 && n->getd() == 1.0 );
   match(ConD);
 
@@ -4727,7 +4059,7 @@
 %}
 
 // Double Immediate
-operand immD() %{
+operand immDPR() %{
   predicate(UseSSE<=1);
   match(ConD);
 
@@ -4736,7 +4068,7 @@
   interface(CONST_INTER);
 %}
 
-operand immXD() %{
+operand immD() %{
   predicate(UseSSE>=2);
   match(ConD);
 
@@ -4746,7 +4078,7 @@
 %}
 
 // Double Immediate zero
-operand immXD0() %{
+operand immD0() %{
   // Do additional (and counter-intuitive) test against NaN to work around VC++
   // bug that generates code such that NaNs compare equal to 0.0 AND do not
   // compare equal to -0.0.
@@ -4758,7 +4090,7 @@
 %}
 
 // Float Immediate zero
-operand immF0() %{
+operand immFPR0() %{
   predicate(UseSSE == 0 && n->getf() == 0.0F);
   match(ConF);
 
@@ -4768,7 +4100,7 @@
 %}
 
 // Float Immediate one
-operand immF1() %{
+operand immFPR1() %{
   predicate(UseSSE == 0 && n->getf() == 1.0F);
   match(ConF);
 
@@ -4778,17 +4110,17 @@
 %}
 
 // Float Immediate
+operand immFPR() %{
+  predicate( UseSSE == 0 );
+  match(ConF);
+
+  op_cost(5);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Float Immediate
 operand immF() %{
-  predicate( UseSSE == 0 );
-  match(ConF);
-
-  op_cost(5);
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-// Float Immediate
-operand immXF() %{
   predicate(UseSSE >= 1);
   match(ConF);
 
@@ -4798,7 +4130,7 @@
 %}
 
 // Float Immediate zero.  Zero and not -0.0
-operand immXF0() %{
+operand immF0() %{
   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
   match(ConF);
 
@@ -5174,7 +4506,7 @@
 %}
 
 // Float register operands
-operand regD() %{
+operand regDPR() %{
   predicate( UseSSE < 2 );
   constraint(ALLOC_IN_RC(dbl_reg));
   match(RegD);
@@ -5184,7 +4516,7 @@
   interface(REG_INTER);
 %}
 
-operand regDPR1(regD reg) %{
+operand regDPR1(regDPR reg) %{
   predicate( UseSSE < 2 );
   constraint(ALLOC_IN_RC(dbl_reg0));
   match(reg);
@@ -5192,7 +4524,7 @@
   interface(REG_INTER);
 %}
 
-operand regDPR2(regD reg) %{
+operand regDPR2(regDPR reg) %{
   predicate( UseSSE < 2 );
   constraint(ALLOC_IN_RC(dbl_reg1));
   match(reg);
@@ -5200,7 +4532,7 @@
   interface(REG_INTER);
 %}
 
-operand regnotDPR1(regD reg) %{
+operand regnotDPR1(regDPR reg) %{
   predicate( UseSSE < 2 );
   constraint(ALLOC_IN_RC(dbl_notreg0));
   match(reg);
@@ -5209,18 +4541,18 @@
 %}
 
 // XMM Double register operands
-operand regXD() %{
+operand regD() %{
   predicate( UseSSE>=2 );
   constraint(ALLOC_IN_RC(xdb_reg));
   match(RegD);
-  match(regXD6);
-  match(regXD7);
+  match(regD6);
+  match(regD7);
   format %{ %}
   interface(REG_INTER);
 %}
 
 // XMM6 double register operands
-operand regXD6(regXD reg) %{
+operand regD6(regD reg) %{
   predicate( UseSSE>=2 );
   constraint(ALLOC_IN_RC(xdb_reg6));
   match(reg);
@@ -5229,7 +4561,7 @@
 %}
 
 // XMM7 double register operands
-operand regXD7(regXD reg) %{
+operand regD7(regD reg) %{
   predicate( UseSSE>=2 );
   constraint(ALLOC_IN_RC(xdb_reg7));
   match(reg);
@@ -5238,7 +4570,7 @@
 %}
 
 // Float register operands
-operand regF() %{
+operand regFPR() %{
   predicate( UseSSE < 2 );
   constraint(ALLOC_IN_RC(flt_reg));
   match(RegF);
@@ -5248,7 +4580,7 @@
 %}
 
 // Float register operands
-operand regFPR1(regF reg) %{
+operand regFPR1(regFPR reg) %{
   predicate( UseSSE < 2 );
   constraint(ALLOC_IN_RC(flt_reg0));
   match(reg);
@@ -5257,7 +4589,7 @@
 %}
 
 // XMM register operands
-operand regX() %{
+operand regF() %{
   predicate( UseSSE>=1 );
   constraint(ALLOC_IN_RC(xmm_reg));
   match(RegF);
@@ -6001,7 +5333,7 @@
 %}
 
 // Conditional move double reg-reg
-pipe_class pipe_cmovD_reg( eFlagsReg cr, regDPR1 dst, regD src) %{
+pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
     single_instruction;
     dst    : S4(write);
     src    : S3(read);
@@ -6010,7 +5342,7 @@
 %}
 
 // Float reg-reg operation
-pipe_class fpu_reg(regD dst) %{
+pipe_class fpu_reg(regDPR dst) %{
     instruction_count(2);
     dst    : S3(read);
     DECODE : S0(2);     // any 2 decoders
@@ -6018,7 +5350,7 @@
 %}
 
 // Float reg-reg operation
-pipe_class fpu_reg_reg(regD dst, regD src) %{
+pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
     instruction_count(2);
     dst    : S4(write);
     src    : S3(read);
@@ -6027,7 +5359,7 @@
 %}
 
 // Float reg-reg operation
-pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2) %{
+pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
     instruction_count(3);
     dst    : S4(write);
     src1   : S3(read);
@@ -6037,7 +5369,7 @@
 %}
 
 // Float reg-reg operation
-pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
+pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
     instruction_count(4);
     dst    : S4(write);
     src1   : S3(read);
@@ -6048,7 +5380,7 @@
 %}
 
 // Float reg-reg operation
-pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3) %{
+pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
     instruction_count(4);
     dst    : S4(write);
     src1   : S3(read);
@@ -6061,7 +5393,7 @@
 %}
 
 // Float reg-mem operation
-pipe_class fpu_reg_mem(regD dst, memory mem) %{
+pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
     instruction_count(2);
     dst    : S5(write);
     mem    : S3(read);
@@ -6072,7 +5404,7 @@
 %}
 
 // Float reg-mem operation
-pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem) %{
+pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
     instruction_count(3);
     dst    : S5(write);
     src1   : S3(read);
@@ -6084,7 +5416,7 @@
 %}
 
 // Float mem-reg operation
-pipe_class fpu_mem_reg(memory mem, regD src) %{
+pipe_class fpu_mem_reg(memory mem, regDPR src) %{
     instruction_count(2);
     src    : S5(read);
     mem    : S3(read);
@@ -6094,7 +5426,7 @@
     MEM    : S3;        // any mem
 %}
 
-pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2) %{
+pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
     instruction_count(3);
     src1   : S3(read);
     src2   : S3(read);
@@ -6105,7 +5437,7 @@
     MEM    : S3;        // any mem
 %}
 
-pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2) %{
+pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
     instruction_count(3);
     src1   : S3(read);
     src2   : S3(read);
@@ -6134,7 +5466,7 @@
     MEM    : S3(3);     // any mem
 %}
 
-pipe_class fpu_mem_reg_con(memory mem, regD src1) %{
+pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
     instruction_count(3);
     src1   : S4(read);
     mem    : S4(read);
@@ -6145,7 +5477,7 @@
 %}
 
 // Float load constant
-pipe_class fpu_reg_con(regD dst) %{
+pipe_class fpu_reg_con(regDPR dst) %{
     instruction_count(2);
     dst    : S5(write);
     D0     : S0;        // big decoder only for the load
@@ -6155,7 +5487,7 @@
 %}
 
 // Float load constant
-pipe_class fpu_reg_reg_con(regD dst, regD src) %{
+pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
     instruction_count(3);
     dst    : S5(write);
     src    : S3(read);
@@ -6870,18 +6202,21 @@
   ins_pipe( fpu_reg_mem );
 %}
 
-instruct loadLX_volatile(stackSlotL dst, memory mem, regXD tmp) %{
+instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
   match(Set dst (LoadL mem));
   effect(TEMP tmp);
   ins_cost(180);
   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
             "MOVSD  $dst,$tmp" %}
-  ins_encode(enc_loadLX_volatile(mem, dst, tmp));
-  ins_pipe( pipe_slow );
-%}
-
-instruct loadLX_reg_volatile(eRegL dst, memory mem, regXD tmp) %{
+  ins_encode %{
+    __ movdbl($tmp$$XMMRegister, $mem$$Address);
+    __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
   match(Set dst (LoadL mem));
   effect(TEMP tmp);
@@ -6890,7 +6225,12 @@
             "MOVD   $dst.lo,$tmp\n\t"
             "PSRLQ  $tmp,32\n\t"
             "MOVD   $dst.hi,$tmp" %}
-  ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp));
+  ins_encode %{
+    __ movdbl($tmp$$XMMRegister, $mem$$Address);
+    __ movdl($dst$$Register, $tmp$$XMMRegister);
+    __ psrlq($tmp$$XMMRegister, 32);
+    __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -6929,7 +6269,7 @@
 %}
 
 // Load Double
-instruct loadD(regD dst, memory mem) %{
+instruct loadDPR(regDPR dst, memory mem) %{
   predicate(UseSSE<=1);
   match(Set dst (LoadD mem));
 
@@ -6938,42 +6278,48 @@
             "FSTP   $dst" %}
   opcode(0xDD);               /* DD /0 */
   ins_encode( OpcP, RMopc_Mem(0x00,mem),
-              Pop_Reg_D(dst) );
+              Pop_Reg_DPR(dst) );
   ins_pipe( fpu_reg_mem );
 %}
 
 // Load Double to XMM
-instruct loadXD(regXD dst, memory mem) %{
+instruct loadD(regD dst, memory mem) %{
   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
   match(Set dst (LoadD mem));
   ins_cost(145);
   format %{ "MOVSD  $dst,$mem" %}
-  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem));
-  ins_pipe( pipe_slow );
-%}
-
-instruct loadXD_partial(regXD dst, memory mem) %{
+  ins_encode %{
+    __ movdbl ($dst$$XMMRegister, $mem$$Address);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct loadD_partial(regD dst, memory mem) %{
   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
   match(Set dst (LoadD mem));
   ins_cost(145);
   format %{ "MOVLPD $dst,$mem" %}
-  ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,mem));
+  ins_encode %{
+    __ movdbl ($dst$$XMMRegister, $mem$$Address);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Load to XMM register (single-precision floating point)
 // MOVSS instruction
-instruct loadX(regX dst, memory mem) %{
+instruct loadF(regF dst, memory mem) %{
   predicate(UseSSE>=1);
   match(Set dst (LoadF mem));
   ins_cost(145);
   format %{ "MOVSS  $dst,$mem" %}
-  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem));
+  ins_encode %{
+    __ movflt ($dst$$XMMRegister, $mem$$Address);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Load Float
-instruct loadF(regF dst, memory mem) %{
+instruct loadFPR(regFPR dst, memory mem) %{
   predicate(UseSSE==0);
   match(Set dst (LoadF mem));
 
@@ -6982,57 +6328,67 @@
             "FSTP   $dst" %}
   opcode(0xD9);               /* D9 /0 */
   ins_encode( OpcP, RMopc_Mem(0x00,mem),
-              Pop_Reg_F(dst) );
+              Pop_Reg_FPR(dst) );
   ins_pipe( fpu_reg_mem );
 %}
 
 // Load Aligned Packed Byte to XMM register
-instruct loadA8B(regXD dst, memory mem) %{
+instruct loadA8B(regD dst, memory mem) %{
   predicate(UseSSE>=1);
   match(Set dst (Load8B mem));
   ins_cost(125);
   format %{ "MOVQ  $dst,$mem\t! packed8B" %}
-  ins_encode( movq_ld(dst, mem));
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $mem$$Address);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Load Aligned Packed Short to XMM register
-instruct loadA4S(regXD dst, memory mem) %{
+instruct loadA4S(regD dst, memory mem) %{
   predicate(UseSSE>=1);
   match(Set dst (Load4S mem));
   ins_cost(125);
   format %{ "MOVQ  $dst,$mem\t! packed4S" %}
-  ins_encode( movq_ld(dst, mem));
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $mem$$Address);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Load Aligned Packed Char to XMM register
-instruct loadA4C(regXD dst, memory mem) %{
+instruct loadA4C(regD dst, memory mem) %{
   predicate(UseSSE>=1);
   match(Set dst (Load4C mem));
   ins_cost(125);
   format %{ "MOVQ  $dst,$mem\t! packed4C" %}
-  ins_encode( movq_ld(dst, mem));
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $mem$$Address);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Load Aligned Packed Integer to XMM register
-instruct load2IU(regXD dst, memory mem) %{
+instruct load2IU(regD dst, memory mem) %{
   predicate(UseSSE>=1);
   match(Set dst (Load2I mem));
   ins_cost(125);
   format %{ "MOVQ  $dst,$mem\t! packed2I" %}
-  ins_encode( movq_ld(dst, mem));
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $mem$$Address);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Load Aligned Packed Single to XMM
-instruct loadA2F(regXD dst, memory mem) %{
+instruct loadA2F(regD dst, memory mem) %{
   predicate(UseSSE>=1);
   match(Set dst (Load2F mem));
   ins_cost(145);
   format %{ "MOVQ  $dst,$mem\t! packed2F" %}
-  ins_encode( movq_ld(dst, mem));
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $mem$$Address);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -7139,8 +6495,8 @@
   ins_pipe( ialu_reg_long );
 %}
 
-// The instruction usage is guarded by predicate in operand immF().
-instruct loadConF(regF dst, immF con) %{
+// The instruction usage is guarded by predicate in operand immFPR().
+instruct loadConFPR(regFPR dst, immFPR con) %{
   match(Set dst con);
   ins_cost(125);
   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
@@ -7152,8 +6508,8 @@
   ins_pipe(fpu_reg_con);
 %}
 
-// The instruction usage is guarded by predicate in operand immF0().
-instruct loadConF0(regF dst, immF0 con) %{
+// The instruction usage is guarded by predicate in operand immFPR0().
+instruct loadConFPR0(regFPR dst, immFPR0 con) %{
   match(Set dst con);
   ins_cost(125);
   format %{ "FLDZ   ST\n\t"
@@ -7165,8 +6521,8 @@
   ins_pipe(fpu_reg_con);
 %}
 
-// The instruction usage is guarded by predicate in operand immF1().
-instruct loadConF1(regF dst, immF1 con) %{
+// The instruction usage is guarded by predicate in operand immFPR1().
+instruct loadConFPR1(regFPR dst, immFPR1 con) %{
   match(Set dst con);
   ins_cost(125);
   format %{ "FLD1   ST\n\t"
@@ -7178,8 +6534,8 @@
   ins_pipe(fpu_reg_con);
 %}
 
-// The instruction usage is guarded by predicate in operand immXF().
-instruct loadConX(regX dst, immXF con) %{
+// The instruction usage is guarded by predicate in operand immF().
+instruct loadConF(regF dst, immF con) %{
   match(Set dst con);
   ins_cost(125);
   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
@@ -7189,8 +6545,8 @@
   ins_pipe(pipe_slow);
 %}
 
-// The instruction usage is guarded by predicate in operand immXF0().
-instruct loadConX0(regX dst, immXF0 src) %{
+// The instruction usage is guarded by predicate in operand immF0().
+instruct loadConF0(regF dst, immF0 src) %{
   match(Set dst src);
   ins_cost(100);
   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
@@ -7200,8 +6556,8 @@
   ins_pipe(pipe_slow);
 %}
 
-// The instruction usage is guarded by predicate in operand immD().
-instruct loadConD(regD dst, immD con) %{
+// The instruction usage is guarded by predicate in operand immDPR().
+instruct loadConDPR(regDPR dst, immDPR con) %{
   match(Set dst con);
   ins_cost(125);
 
@@ -7214,8 +6570,8 @@
   ins_pipe(fpu_reg_con);
 %}
 
-// The instruction usage is guarded by predicate in operand immD0().
-instruct loadConD0(regD dst, immD0 con) %{
+// The instruction usage is guarded by predicate in operand immDPR0().
+instruct loadConDPR0(regDPR dst, immDPR0 con) %{
   match(Set dst con);
   ins_cost(125);
 
@@ -7228,8 +6584,8 @@
   ins_pipe(fpu_reg_con);
 %}
 
-// The instruction usage is guarded by predicate in operand immD1().
-instruct loadConD1(regD dst, immD1 con) %{
+// The instruction usage is guarded by predicate in operand immDPR1().
+instruct loadConDPR1(regDPR dst, immDPR1 con) %{
   match(Set dst con);
   ins_cost(125);
 
@@ -7242,8 +6598,8 @@
   ins_pipe(fpu_reg_con);
 %}
 
-// The instruction usage is guarded by predicate in operand immXD().
-instruct loadConXD(regXD dst, immXD con) %{
+// The instruction usage is guarded by predicate in operand immD().
+instruct loadConD(regD dst, immD con) %{
   match(Set dst con);
   ins_cost(125);
   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
@@ -7253,12 +6609,14 @@
   ins_pipe(pipe_slow);
 %}
 
-// The instruction usage is guarded by predicate in operand immXD0().
-instruct loadConXD0(regXD dst, immXD0 src) %{
+// The instruction usage is guarded by predicate in operand immD0().
+instruct loadConD0(regD dst, immD0 src) %{
   match(Set dst src);
   ins_cost(100);
   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
-  ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x57), RegReg(dst,dst));
+  ins_encode %{
+    __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -7296,7 +6654,7 @@
 %}
 
 // Load Stack Slot
-instruct loadSSF(regF dst, stackSlotF src) %{
+instruct loadSSF(regFPR dst, stackSlotF src) %{
   match(Set dst src);
   ins_cost(125);
 
@@ -7304,12 +6662,12 @@
             "FSTP   $dst" %}
   opcode(0xD9);               /* D9 /0, FLD m32real */
   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
-              Pop_Reg_F(dst) );
+              Pop_Reg_FPR(dst) );
   ins_pipe( fpu_reg_mem );
 %}
 
 // Load Stack Slot
-instruct loadSSD(regD dst, stackSlotD src) %{
+instruct loadSSD(regDPR dst, stackSlotD src) %{
   match(Set dst src);
   ins_cost(125);
 
@@ -7317,7 +6675,7 @@
             "FSTP   $dst" %}
   opcode(0xDD);               /* DD /0, FLD m64real */
   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
-              Pop_Reg_D(dst) );
+              Pop_Reg_DPR(dst) );
   ins_pipe( fpu_reg_mem );
 %}
 
@@ -7552,7 +6910,7 @@
   ins_pipe( fpu_reg_mem );
 %}
 
-instruct storeLX_volatile(memory mem, stackSlotL src, regXD tmp, eFlagsReg cr) %{
+instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
   match(Set mem (StoreL mem src));
   effect( TEMP tmp, KILL cr );
@@ -7560,12 +6918,15 @@
   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
             "MOVSD  $tmp,$src\n\t"
             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
-  opcode(0x3B);
-  ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_volatile(mem, src, tmp));
-  ins_pipe( pipe_slow );
-%}
-
-instruct storeLX_reg_volatile(memory mem, eRegL src, regXD tmp2, regXD tmp, eFlagsReg cr) %{
+  ins_encode %{
+    __ cmpl(rax, $mem$$Address);
+    __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
+    __ movdbl($mem$$Address, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
   match(Set mem (StoreL mem src));
   effect( TEMP tmp2 , TEMP tmp, KILL cr );
@@ -7575,8 +6936,13 @@
             "MOVD   $tmp2,$src.hi\n\t"
             "PUNPCKLDQ $tmp,$tmp2\n\t"
             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
-  opcode(0x3B);
-  ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_reg_volatile(mem, src, tmp, tmp2));
+  ins_encode %{
+    __ cmpl(rax, $mem$$Address);
+    __ movdl($tmp$$XMMRegister, $src$$Register);
+    __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
+    __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
+    __ movdbl($mem$$Address, $tmp$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -7638,32 +7004,38 @@
 %}
 
 // Store Aligned Packed Byte XMM register to memory
-instruct storeA8B(memory mem, regXD src) %{
+instruct storeA8B(memory mem, regD src) %{
   predicate(UseSSE>=1);
   match(Set mem (Store8B mem src));
   ins_cost(145);
   format %{ "MOVQ  $mem,$src\t! packed8B" %}
-  ins_encode( movq_st(mem, src));
+  ins_encode %{
+    __ movq($mem$$Address, $src$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Store Aligned Packed Char/Short XMM register to memory
-instruct storeA4C(memory mem, regXD src) %{
+instruct storeA4C(memory mem, regD src) %{
   predicate(UseSSE>=1);
   match(Set mem (Store4C mem src));
   ins_cost(145);
   format %{ "MOVQ  $mem,$src\t! packed4C" %}
-  ins_encode( movq_st(mem, src));
+  ins_encode %{
+    __ movq($mem$$Address, $src$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Store Aligned Packed Integer XMM register to memory
-instruct storeA2I(memory mem, regXD src) %{
+instruct storeA2I(memory mem, regD src) %{
   predicate(UseSSE>=1);
   match(Set mem (Store2I mem src));
   ins_cost(145);
   format %{ "MOVQ  $mem,$src\t! packed2I" %}
-  ins_encode( movq_st(mem, src));
+  ins_encode %{
+    __ movq($mem$$Address, $src$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -7679,98 +7051,116 @@
 %}
 
 // Store Double
-instruct storeD( memory mem, regDPR1 src) %{
+instruct storeDPR( memory mem, regDPR1 src) %{
   predicate(UseSSE<=1);
   match(Set mem (StoreD mem src));
 
   ins_cost(100);
   format %{ "FST_D  $mem,$src" %}
   opcode(0xDD);       /* DD /2 */
-  ins_encode( enc_FP_store(mem,src) );
+  ins_encode( enc_FPR_store(mem,src) );
   ins_pipe( fpu_mem_reg );
 %}
 
 // Store double does rounding on x86
-instruct storeD_rounded( memory mem, regDPR1 src) %{
+instruct storeDPR_rounded( memory mem, regDPR1 src) %{
   predicate(UseSSE<=1);
   match(Set mem (StoreD mem (RoundDouble src)));
 
   ins_cost(100);
   format %{ "FST_D  $mem,$src\t# round" %}
   opcode(0xDD);       /* DD /2 */
-  ins_encode( enc_FP_store(mem,src) );
+  ins_encode( enc_FPR_store(mem,src) );
   ins_pipe( fpu_mem_reg );
 %}
 
 // Store XMM register to memory (double-precision floating points)
 // MOVSD instruction
-instruct storeXD(memory mem, regXD src) %{
+instruct storeD(memory mem, regD src) %{
   predicate(UseSSE>=2);
   match(Set mem (StoreD mem src));
   ins_cost(95);
   format %{ "MOVSD  $mem,$src" %}
-  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src, mem));
+  ins_encode %{
+    __ movdbl($mem$$Address, $src$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Store XMM register to memory (single-precision floating point)
 // MOVSS instruction
-instruct storeX(memory mem, regX src) %{
+instruct storeF(memory mem, regF src) %{
   predicate(UseSSE>=1);
   match(Set mem (StoreF mem src));
   ins_cost(95);
   format %{ "MOVSS  $mem,$src" %}
-  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, mem));
+  ins_encode %{
+    __ movflt($mem$$Address, $src$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Store Aligned Packed Single Float XMM register to memory
-instruct storeA2F(memory mem, regXD src) %{
+instruct storeA2F(memory mem, regD src) %{
   predicate(UseSSE>=1);
   match(Set mem (Store2F mem src));
   ins_cost(145);
   format %{ "MOVQ  $mem,$src\t! packed2F" %}
-  ins_encode( movq_st(mem, src));
+  ins_encode %{
+    __ movq($mem$$Address, $src$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Store Float
-instruct storeF( memory mem, regFPR1 src) %{
+instruct storeFPR( memory mem, regFPR1 src) %{
   predicate(UseSSE==0);
   match(Set mem (StoreF mem src));
 
   ins_cost(100);
   format %{ "FST_S  $mem,$src" %}
   opcode(0xD9);       /* D9 /2 */
-  ins_encode( enc_FP_store(mem,src) );
+  ins_encode( enc_FPR_store(mem,src) );
   ins_pipe( fpu_mem_reg );
 %}
 
 // Store Float does rounding on x86
-instruct storeF_rounded( memory mem, regFPR1 src) %{
+instruct storeFPR_rounded( memory mem, regFPR1 src) %{
   predicate(UseSSE==0);
   match(Set mem (StoreF mem (RoundFloat src)));
 
   ins_cost(100);
   format %{ "FST_S  $mem,$src\t# round" %}
   opcode(0xD9);       /* D9 /2 */
-  ins_encode( enc_FP_store(mem,src) );
+  ins_encode( enc_FPR_store(mem,src) );
   ins_pipe( fpu_mem_reg );
 %}
 
 // Store Float does rounding on x86
-instruct storeF_Drounded( memory mem, regDPR1 src) %{
+instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
   predicate(UseSSE<=1);
   match(Set mem (StoreF mem (ConvD2F src)));
 
   ins_cost(100);
   format %{ "FST_S  $mem,$src\t# D-round" %}
   opcode(0xD9);       /* D9 /2 */
-  ins_encode( enc_FP_store(mem,src) );
+  ins_encode( enc_FPR_store(mem,src) );
   ins_pipe( fpu_mem_reg );
 %}
 
 // Store immediate Float value (it is faster than store from FPU register)
+// The instruction usage is guarded by predicate in operand immFPR().
+instruct storeFPR_imm( memory mem, immFPR src) %{
+  match(Set mem (StoreF mem src));
+
+  ins_cost(50);
+  format %{ "MOV    $mem,$src\t# store float" %}
+  opcode(0xC7);               /* C7 /0 */
+  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
+  ins_pipe( ialu_mem_imm );
+%}
+
+// Store immediate Float value (it is faster than store from XMM register)
 // The instruction usage is guarded by predicate in operand immF().
 instruct storeF_imm( memory mem, immF src) %{
   match(Set mem (StoreF mem src));
@@ -7782,18 +7172,6 @@
   ins_pipe( ialu_mem_imm );
 %}
 
-// Store immediate Float value (it is faster than store from XMM register)
-// The instruction usage is guarded by predicate in operand immXF().
-instruct storeX_imm( memory mem, immXF src) %{
-  match(Set mem (StoreF mem src));
-
-  ins_cost(50);
-  format %{ "MOV    $mem,$src\t# store float" %}
-  opcode(0xC7);               /* C7 /0 */
-  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32XF_as_bits( src ));
-  ins_pipe( ialu_mem_imm );
-%}
-
 // Store Integer to stack slot
 instruct storeSSI(stackSlotI dst, eRegI src) %{
   match(Set dst src);
@@ -7901,6 +7279,16 @@
   ins_pipe(empty);
 %}
 
+instruct membar_storestore() %{
+  match(MemBarStoreStore);
+  ins_cost(0);
+
+  size(0);
+  format %{ "MEMBAR-storestore (empty encoding)" %}
+  ins_encode( );
+  ins_pipe(empty);
+%}
+
 //----------Move Instructions--------------------------------------------------
 instruct castX2P(eAXRegP dst, eAXRegI src) %{
   match(Set dst (CastX2P src));
@@ -8088,29 +7476,29 @@
 //%}
 
 // Conditional move
-instruct fcmovD_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regD src) %{
+instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
   predicate(UseSSE<=1);
   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
   ins_cost(200);
   format %{ "FCMOV$cop $dst,$src\t# double" %}
   opcode(0xDA);
-  ins_encode( enc_cmov_d(cop,src) );
-  ins_pipe( pipe_cmovD_reg );
+  ins_encode( enc_cmov_dpr(cop,src) );
+  ins_pipe( pipe_cmovDPR_reg );
 %}
 
 // Conditional move
-instruct fcmovF_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regF src) %{
+instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
   predicate(UseSSE==0);
   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
   ins_cost(200);
   format %{ "FCMOV$cop $dst,$src\t# float" %}
   opcode(0xDA);
-  ins_encode( enc_cmov_d(cop,src) );
-  ins_pipe( pipe_cmovD_reg );
+  ins_encode( enc_cmov_dpr(cop,src) );
+  ins_pipe( pipe_cmovDPR_reg );
 %}
 
 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
-instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
+instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
   predicate(UseSSE<=1);
   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
   ins_cost(200);
@@ -8118,12 +7506,12 @@
             "MOV    $dst,$src\t# double\n"
       "skip:" %}
   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
-  ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_D(src), OpcP, RegOpc(dst) );
-  ins_pipe( pipe_cmovD_reg );
+  ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
+  ins_pipe( pipe_cmovDPR_reg );
 %}
 
 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
-instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
+instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
   predicate(UseSSE==0);
   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
   ins_cost(200);
@@ -8131,12 +7519,12 @@
             "MOV    $dst,$src\t# float\n"
       "skip:" %}
   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
-  ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_F(src), OpcP, RegOpc(dst) );
-  ins_pipe( pipe_cmovD_reg );
+  ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
+  ins_pipe( pipe_cmovDPR_reg );
 %}
 
 // No CMOVE with SSE/SSE2
-instruct fcmovX_regS(cmpOp cop, eFlagsReg cr, regX dst, regX src) %{
+instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
   predicate (UseSSE>=1);
   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
   ins_cost(200);
@@ -8154,7 +7542,7 @@
 %}
 
 // No CMOVE with SSE/SSE2
-instruct fcmovXD_regS(cmpOp cop, eFlagsReg cr, regXD dst, regXD src) %{
+instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
   predicate (UseSSE>=2);
   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
   ins_cost(200);
@@ -8172,7 +7560,7 @@
 %}
 
 // unsigned version
-instruct fcmovX_regU(cmpOpU cop, eFlagsRegU cr, regX dst, regX src) %{
+instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
   predicate (UseSSE>=1);
   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
   ins_cost(200);
@@ -8189,17 +7577,17 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct fcmovX_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regX dst, regX src) %{
+instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
   predicate (UseSSE>=1);
   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
   ins_cost(200);
   expand %{
-    fcmovX_regU(cop, cr, dst, src);
+    fcmovF_regU(cop, cr, dst, src);
   %}
 %}
 
 // unsigned version
-instruct fcmovXD_regU(cmpOpU cop, eFlagsRegU cr, regXD dst, regXD src) %{
+instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
   predicate (UseSSE>=2);
   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
   ins_cost(200);
@@ -8216,12 +7604,12 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct fcmovXD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regXD dst, regXD src) %{
+instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
   predicate (UseSSE>=2);
   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
   ins_cost(200);
   expand %{
-    fcmovXD_regU(cop, cr, dst, src);
+    fcmovD_regU(cop, cr, dst, src);
   %}
 %}
 
@@ -8440,7 +7828,7 @@
 %}
 
 // LoadLong-locked - same as a volatile long load when used with compare-swap
-instruct loadLLocked(stackSlotL dst, load_long_memory mem) %{
+instruct loadLLocked(stackSlotL dst, memory mem) %{
   predicate(UseSSE<=1);
   match(Set dst (LoadLLocked mem));
 
@@ -8451,18 +7839,21 @@
   ins_pipe( fpu_reg_mem );
 %}
 
-instruct loadLX_Locked(stackSlotL dst, load_long_memory mem, regXD tmp) %{
+instruct loadLX_Locked(stackSlotL dst, memory mem, regD tmp) %{
   predicate(UseSSE>=2);
   match(Set dst (LoadLLocked mem));
   effect(TEMP tmp);
   ins_cost(180);
   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
             "MOVSD  $dst,$tmp" %}
-  ins_encode(enc_loadLX_volatile(mem, dst, tmp));
-  ins_pipe( pipe_slow );
-%}
-
-instruct loadLX_reg_Locked(eRegL dst, load_long_memory mem, regXD tmp) %{
+  ins_encode %{
+    __ movdbl($tmp$$XMMRegister, $mem$$Address);
+    __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct loadLX_reg_Locked(eRegL dst, memory mem, regD tmp) %{
   predicate(UseSSE>=2);
   match(Set dst (LoadLLocked mem));
   effect(TEMP tmp);
@@ -8471,7 +7862,12 @@
             "MOVD   $dst.lo,$tmp\n\t"
             "PSRLQ  $tmp,32\n\t"
             "MOVD   $dst.hi,$tmp" %}
-  ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp));
+  ins_encode %{
+    __ movdbl($tmp$$XMMRegister, $mem$$Address);
+    __ movdl($dst$$Register, $tmp$$XMMRegister);
+    __ psrlq($tmp$$XMMRegister, 32);
+    __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -10054,7 +9450,7 @@
 // Compare & branch
 
 // P6 version of float compare, sets condition codes in EFLAGS
-instruct cmpD_cc_P6(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{
+instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
   predicate(VM_Version::supports_cmov() && UseSSE <=1);
   match(Set cr (CmpD src1 src2));
   effect(KILL rax);
@@ -10066,26 +9462,26 @@
             "SAHF\n"
      "exit:\tNOP               // avoid branch to branch" %}
   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
-  ins_encode( Push_Reg_D(src1),
+  ins_encode( Push_Reg_DPR(src1),
               OpcP, RegOpc(src2),
               cmpF_P6_fixup );
   ins_pipe( pipe_slow );
 %}
 
-instruct cmpD_cc_P6CF(eFlagsRegUCF cr, regD src1, regD src2) %{
+instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
   predicate(VM_Version::supports_cmov() && UseSSE <=1);
   match(Set cr (CmpD src1 src2));
   ins_cost(150);
   format %{ "FLD    $src1\n\t"
             "FUCOMIP ST,$src2  // P6 instruction" %}
   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
-  ins_encode( Push_Reg_D(src1),
+  ins_encode( Push_Reg_DPR(src1),
               OpcP, RegOpc(src2));
   ins_pipe( pipe_slow );
 %}
 
 // Compare & branch
-instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{
+instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
   predicate(UseSSE<=1);
   match(Set cr (CmpD src1 src2));
   effect(KILL rax);
@@ -10098,138 +9494,140 @@
             "MOV    AH,1\t# unordered treat as LT\n"
     "flags:\tSAHF" %}
   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
-  ins_encode( Push_Reg_D(src1),
+  ins_encode( Push_Reg_DPR(src1),
               OpcP, RegOpc(src2),
               fpu_flags);
   ins_pipe( pipe_slow );
 %}
 
 // Compare vs zero into -1,0,1
-instruct cmpD_0(eRegI dst, regD src1, immD0 zero, eAXRegI rax, eFlagsReg cr) %{
+instruct cmpDPR_0(eRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
   predicate(UseSSE<=1);
   match(Set dst (CmpD3 src1 zero));
   effect(KILL cr, KILL rax);
   ins_cost(280);
   format %{ "FTSTD  $dst,$src1" %}
   opcode(0xE4, 0xD9);
-  ins_encode( Push_Reg_D(src1),
+  ins_encode( Push_Reg_DPR(src1),
               OpcS, OpcP, PopFPU,
               CmpF_Result(dst));
   ins_pipe( pipe_slow );
 %}
 
 // Compare into -1,0,1
-instruct cmpD_reg(eRegI dst, regD src1, regD src2, eAXRegI rax, eFlagsReg cr) %{
+instruct cmpDPR_reg(eRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
   predicate(UseSSE<=1);
   match(Set dst (CmpD3 src1 src2));
   effect(KILL cr, KILL rax);
   ins_cost(300);
   format %{ "FCMPD  $dst,$src1,$src2" %}
   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
-  ins_encode( Push_Reg_D(src1),
+  ins_encode( Push_Reg_DPR(src1),
               OpcP, RegOpc(src2),
               CmpF_Result(dst));
   ins_pipe( pipe_slow );
 %}
 
 // float compare and set condition codes in EFLAGS by XMM regs
-instruct cmpXD_cc(eFlagsRegU cr, regXD dst, regXD src, eAXRegI rax) %{
+instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
   predicate(UseSSE>=2);
-  match(Set cr (CmpD dst src));
-  effect(KILL rax);
-  ins_cost(125);
-  format %{ "COMISD $dst,$src\n"
-          "\tJNP    exit\n"
-          "\tMOV    ah,1       // saw a NaN, set CF\n"
-          "\tSAHF\n"
-     "exit:\tNOP               // avoid branch to branch" %}
-  opcode(0x66, 0x0F, 0x2F);
-  ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src), cmpF_P6_fixup);
-  ins_pipe( pipe_slow );
-%}
-
-instruct cmpXD_ccCF(eFlagsRegUCF cr, regXD dst, regXD src) %{
+  match(Set cr (CmpD src1 src2));
+  ins_cost(145);
+  format %{ "UCOMISD $src1,$src2\n\t"
+            "JNP,s   exit\n\t"
+            "PUSHF\t# saw NaN, set CF\n\t"
+            "AND     [rsp], #0xffffff2b\n\t"
+            "POPF\n"
+    "exit:" %}
+  ins_encode %{
+    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
+    emit_cmpfp_fixup(_masm);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
   predicate(UseSSE>=2);
-  match(Set cr (CmpD dst src));
+  match(Set cr (CmpD src1 src2));
   ins_cost(100);
-  format %{ "COMISD $dst,$src" %}
-  opcode(0x66, 0x0F, 0x2F);
-  ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
+  format %{ "UCOMISD $src1,$src2" %}
+  ins_encode %{
+    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // float compare and set condition codes in EFLAGS by XMM regs
-instruct cmpXD_ccmem(eFlagsRegU cr, regXD dst, memory src, eAXRegI rax) %{
+instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
   predicate(UseSSE>=2);
-  match(Set cr (CmpD dst (LoadD src)));
-  effect(KILL rax);
+  match(Set cr (CmpD src1 (LoadD src2)));
   ins_cost(145);
-  format %{ "COMISD $dst,$src\n"
-          "\tJNP    exit\n"
-          "\tMOV    ah,1       // saw a NaN, set CF\n"
-          "\tSAHF\n"
-     "exit:\tNOP               // avoid branch to branch" %}
-  opcode(0x66, 0x0F, 0x2F);
-  ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src), cmpF_P6_fixup);
-  ins_pipe( pipe_slow );
-%}
-
-instruct cmpXD_ccmemCF(eFlagsRegUCF cr, regXD dst, memory src) %{
+  format %{ "UCOMISD $src1,$src2\n\t"
+            "JNP,s   exit\n\t"
+            "PUSHF\t# saw NaN, set CF\n\t"
+            "AND     [rsp], #0xffffff2b\n\t"
+            "POPF\n"
+    "exit:" %}
+  ins_encode %{
+    __ ucomisd($src1$$XMMRegister, $src2$$Address);
+    emit_cmpfp_fixup(_masm);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
   predicate(UseSSE>=2);
-  match(Set cr (CmpD dst (LoadD src)));
+  match(Set cr (CmpD src1 (LoadD src2)));
   ins_cost(100);
-  format %{ "COMISD $dst,$src" %}
-  opcode(0x66, 0x0F, 0x2F);
-  ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src));
+  format %{ "UCOMISD $src1,$src2" %}
+  ins_encode %{
+    __ ucomisd($src1$$XMMRegister, $src2$$Address);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Compare into -1,0,1 in XMM
-instruct cmpXD_reg(eRegI dst, regXD src1, regXD src2, eFlagsReg cr) %{
+instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
   predicate(UseSSE>=2);
   match(Set dst (CmpD3 src1 src2));
   effect(KILL cr);
   ins_cost(255);
-  format %{ "XOR    $dst,$dst\n"
-          "\tCOMISD $src1,$src2\n"
-          "\tJP,s   nan\n"
-          "\tJEQ,s  exit\n"
-          "\tJA,s   inc\n"
-      "nan:\tDEC    $dst\n"
-          "\tJMP,s  exit\n"
-      "inc:\tINC    $dst\n"
-      "exit:"
-                %}
-  opcode(0x66, 0x0F, 0x2F);
-  ins_encode(Xor_Reg(dst), OpcP, OpcS, Opcode(tertiary), RegReg(src1, src2),
-             CmpX_Result(dst));
+  format %{ "UCOMISD $src1, $src2\n\t"
+            "MOV     $dst, #-1\n\t"
+            "JP,s    done\n\t"
+            "JB,s    done\n\t"
+            "SETNE   $dst\n\t"
+            "MOVZB   $dst, $dst\n"
+    "done:" %}
+  ins_encode %{
+    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
+    emit_cmpfp3(_masm, $dst$$Register);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Compare into -1,0,1 in XMM and memory
-instruct cmpXD_regmem(eRegI dst, regXD src1, memory mem, eFlagsReg cr) %{
+instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
   predicate(UseSSE>=2);
-  match(Set dst (CmpD3 src1 (LoadD mem)));
+  match(Set dst (CmpD3 src1 (LoadD src2)));
   effect(KILL cr);
   ins_cost(275);
-  format %{ "COMISD $src1,$mem\n"
-          "\tMOV    $dst,0\t\t# do not blow flags\n"
-          "\tJP,s   nan\n"
-          "\tJEQ,s  exit\n"
-          "\tJA,s   inc\n"
-      "nan:\tDEC    $dst\n"
-          "\tJMP,s  exit\n"
-      "inc:\tINC    $dst\n"
-      "exit:"
-                %}
-  opcode(0x66, 0x0F, 0x2F);
-  ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(src1, mem),
-             LdImmI(dst,0x0), CmpX_Result(dst));
-  ins_pipe( pipe_slow );
-%}
-
-
-instruct subD_reg(regD dst, regD src) %{
+  format %{ "UCOMISD $src1, $src2\n\t"
+            "MOV     $dst, #-1\n\t"
+            "JP,s    done\n\t"
+            "JB,s    done\n\t"
+            "SETNE   $dst\n\t"
+            "MOVZB   $dst, $dst\n"
+    "done:" %}
+  ins_encode %{
+    __ ucomisd($src1$$XMMRegister, $src2$$Address);
+    emit_cmpfp3(_masm, $dst$$Register);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+
+instruct subDPR_reg(regDPR dst, regDPR src) %{
   predicate (UseSSE <=1);
   match(Set dst (SubD dst src));
 
@@ -10237,12 +9635,12 @@
             "DSUBp  $dst,ST" %}
   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
   ins_cost(150);
-  ins_encode( Push_Reg_D(src),
+  ins_encode( Push_Reg_DPR(src),
               OpcP, RegOpc(dst) );
   ins_pipe( fpu_reg_reg );
 %}
 
-instruct subD_reg_round(stackSlotD dst, regD src1, regD src2) %{
+instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
   predicate (UseSSE <=1);
   match(Set dst (RoundDouble (SubD src1 src2)));
   ins_cost(250);
@@ -10251,13 +9649,13 @@
             "DSUB   ST,$src1\n\t"
             "FSTP_D $dst\t# D-round" %}
   opcode(0xD8, 0x5);
-  ins_encode( Push_Reg_D(src2),
-              OpcP, RegOpc(src1), Pop_Mem_D(dst) );
+  ins_encode( Push_Reg_DPR(src2),
+              OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
   ins_pipe( fpu_mem_reg_reg );
 %}
 
 
-instruct subD_reg_mem(regD dst, memory src) %{
+instruct subDPR_reg_mem(regDPR dst, memory src) %{
   predicate (UseSSE <=1);
   match(Set dst (SubD dst (LoadD src)));
   ins_cost(150);
@@ -10270,7 +9668,7 @@
   ins_pipe( fpu_reg_mem );
 %}
 
-instruct absD_reg(regDPR1 dst, regDPR1 src) %{
+instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
   predicate (UseSSE<=1);
   match(Set dst (AbsD src));
   ins_cost(100);
@@ -10280,15 +9678,7 @@
   ins_pipe( fpu_reg_reg );
 %}
 
-instruct absXD_reg( regXD dst ) %{
-  predicate(UseSSE>=2);
-  match(Set dst (AbsD dst));
-  format %{ "ANDPD  $dst,[0x7FFFFFFFFFFFFFFF]\t# ABS D by sign masking" %}
-  ins_encode( AbsXD_encoding(dst));
-  ins_pipe( pipe_slow );
-%}
-
-instruct negD_reg(regDPR1 dst, regDPR1 src) %{
+instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
   predicate(UseSSE<=1);
   match(Set dst (NegD src));
   ins_cost(100);
@@ -10298,18 +9688,7 @@
   ins_pipe( fpu_reg_reg );
 %}
 
-instruct negXD_reg( regXD dst ) %{
-  predicate(UseSSE>=2);
-  match(Set dst (NegD dst));
-  format %{ "XORPD  $dst,[0x8000000000000000]\t# CHS D by sign flipping" %}
-  ins_encode %{
-     __ xorpd($dst$$XMMRegister,
-              ExternalAddress((address)double_signflip_pool));
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct addD_reg(regD dst, regD src) %{
+instruct addDPR_reg(regDPR dst, regDPR src) %{
   predicate(UseSSE<=1);
   match(Set dst (AddD dst src));
   format %{ "FLD    $src\n\t"
@@ -10317,13 +9696,13 @@
   size(4);
   ins_cost(150);
   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
-  ins_encode( Push_Reg_D(src),
+  ins_encode( Push_Reg_DPR(src),
               OpcP, RegOpc(dst) );
   ins_pipe( fpu_reg_reg );
 %}
 
 
-instruct addD_reg_round(stackSlotD dst, regD src1, regD src2) %{
+instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
   predicate(UseSSE<=1);
   match(Set dst (RoundDouble (AddD src1 src2)));
   ins_cost(250);
@@ -10332,13 +9711,13 @@
             "DADD   ST,$src1\n\t"
             "FSTP_D $dst\t# D-round" %}
   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
-  ins_encode( Push_Reg_D(src2),
-              OpcP, RegOpc(src1), Pop_Mem_D(dst) );
+  ins_encode( Push_Reg_DPR(src2),
+              OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
   ins_pipe( fpu_mem_reg_reg );
 %}
 
 
-instruct addD_reg_mem(regD dst, memory src) %{
+instruct addDPR_reg_mem(regDPR dst, memory src) %{
   predicate(UseSSE<=1);
   match(Set dst (AddD dst (LoadD src)));
   ins_cost(150);
@@ -10352,7 +9731,7 @@
 %}
 
 // add-to-memory
-instruct addD_mem_reg(memory dst, regD src) %{
+instruct addDPR_mem_reg(memory dst, regDPR src) %{
   predicate(UseSSE<=1);
   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
   ins_cost(150);
@@ -10368,7 +9747,7 @@
   ins_pipe( fpu_reg_mem );
 %}
 
-instruct addD_reg_imm1(regD dst, immD1 con) %{
+instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
   predicate(UseSSE<=1);
   match(Set dst (AddD dst con));
   ins_cost(125);
@@ -10381,7 +9760,7 @@
   ins_pipe(fpu_reg);
 %}
 
-instruct addD_reg_imm(regD dst, immD con) %{
+instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
   match(Set dst (AddD dst con));
   ins_cost(200);
@@ -10394,7 +9773,7 @@
   ins_pipe(fpu_reg_mem);
 %}
 
-instruct addD_reg_imm_round(stackSlotD dst, regD src, immD con) %{
+instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
   match(Set dst (RoundDouble (AddD src con)));
   ins_cost(200);
@@ -10409,124 +9788,14 @@
   ins_pipe(fpu_mem_reg_con);
 %}
 
-// Add two double precision floating point values in xmm
-instruct addXD_reg(regXD dst, regXD src) %{
-  predicate(UseSSE>=2);
-  match(Set dst (AddD dst src));
-  format %{ "ADDSD  $dst,$src" %}
-  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegReg(dst, src));
-  ins_pipe( pipe_slow );
-%}
-
-instruct addXD_imm(regXD dst, immXD con) %{
-  predicate(UseSSE>=2);
-  match(Set dst (AddD dst con));
-  format %{ "ADDSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
-  ins_encode %{
-    __ addsd($dst$$XMMRegister, $constantaddress($con));
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct addXD_mem(regXD dst, memory mem) %{
-  predicate(UseSSE>=2);
-  match(Set dst (AddD dst (LoadD mem)));
-  format %{ "ADDSD  $dst,$mem" %}
-  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegMem(dst,mem));
-  ins_pipe( pipe_slow );
-%}
-
-// Sub two double precision floating point values in xmm
-instruct subXD_reg(regXD dst, regXD src) %{
-  predicate(UseSSE>=2);
-  match(Set dst (SubD dst src));
-  format %{ "SUBSD  $dst,$src" %}
-  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src));
-  ins_pipe( pipe_slow );
-%}
-
-instruct subXD_imm(regXD dst, immXD con) %{
-  predicate(UseSSE>=2);
-  match(Set dst (SubD dst con));
-  format %{ "SUBSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
-  ins_encode %{
-    __ subsd($dst$$XMMRegister, $constantaddress($con));
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct subXD_mem(regXD dst, memory mem) %{
-  predicate(UseSSE>=2);
-  match(Set dst (SubD dst (LoadD mem)));
-  format %{ "SUBSD  $dst,$mem" %}
-  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem));
-  ins_pipe( pipe_slow );
-%}
-
-// Mul two double precision floating point values in xmm
-instruct mulXD_reg(regXD dst, regXD src) %{
-  predicate(UseSSE>=2);
-  match(Set dst (MulD dst src));
-  format %{ "MULSD  $dst,$src" %}
-  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegReg(dst, src));
-  ins_pipe( pipe_slow );
-%}
-
-instruct mulXD_imm(regXD dst, immXD con) %{
-  predicate(UseSSE>=2);
-  match(Set dst (MulD dst con));
-  format %{ "MULSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
-  ins_encode %{
-    __ mulsd($dst$$XMMRegister, $constantaddress($con));
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct mulXD_mem(regXD dst, memory mem) %{
-  predicate(UseSSE>=2);
-  match(Set dst (MulD dst (LoadD mem)));
-  format %{ "MULSD  $dst,$mem" %}
-  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem));
-  ins_pipe( pipe_slow );
-%}
-
-// Div two double precision floating point values in xmm
-instruct divXD_reg(regXD dst, regXD src) %{
-  predicate(UseSSE>=2);
-  match(Set dst (DivD dst src));
-  format %{ "DIVSD  $dst,$src" %}
-  opcode(0xF2, 0x0F, 0x5E);
-  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src));
-  ins_pipe( pipe_slow );
-%}
-
-instruct divXD_imm(regXD dst, immXD con) %{
-  predicate(UseSSE>=2);
-  match(Set dst (DivD dst con));
-  format %{ "DIVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
-  ins_encode %{
-    __ divsd($dst$$XMMRegister, $constantaddress($con));
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct divXD_mem(regXD dst, memory mem) %{
-  predicate(UseSSE>=2);
-  match(Set dst (DivD dst (LoadD mem)));
-  format %{ "DIVSD  $dst,$mem" %}
-  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem));
-  ins_pipe( pipe_slow );
-%}
-
-
-instruct mulD_reg(regD dst, regD src) %{
+instruct mulDPR_reg(regDPR dst, regDPR src) %{
   predicate(UseSSE<=1);
   match(Set dst (MulD dst src));
   format %{ "FLD    $src\n\t"
             "DMULp  $dst,ST" %}
   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
   ins_cost(150);
-  ins_encode( Push_Reg_D(src),
+  ins_encode( Push_Reg_DPR(src),
               OpcP, RegOpc(dst) );
   ins_pipe( fpu_reg_reg );
 %}
@@ -10539,7 +9808,7 @@
 // multiply scaled arg1 by arg2
 // rescale product by 2^(15360)
 //
-instruct strictfp_mulD_reg(regDPR1 dst, regnotDPR1 src) %{
+instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
   match(Set dst (MulD dst src));
   ins_cost(1);   // Select this instruction for all strict FP double multiplies
@@ -10552,13 +9821,13 @@
             "DMULp  $dst,ST\n\t" %}
   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
   ins_encode( strictfp_bias1(dst),
-              Push_Reg_D(src),
+              Push_Reg_DPR(src),
               OpcP, RegOpc(dst),
               strictfp_bias2(dst) );
   ins_pipe( fpu_reg_reg );
 %}
 
-instruct mulD_reg_imm(regD dst, immD con) %{
+instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
   match(Set dst (MulD dst con));
   ins_cost(200);
@@ -10572,7 +9841,7 @@
 %}
 
 
-instruct mulD_reg_mem(regD dst, memory src) %{
+instruct mulDPR_reg_mem(regDPR dst, memory src) %{
   predicate( UseSSE<=1 );
   match(Set dst (MulD dst (LoadD src)));
   ins_cost(200);
@@ -10586,7 +9855,7 @@
 
 //
 // Cisc-alternate to reg-reg multiply
-instruct mulD_reg_mem_cisc(regD dst, regD src, memory mem) %{
+instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
   predicate( UseSSE<=1 );
   match(Set dst (MulD src (LoadD mem)));
   ins_cost(250);
@@ -10595,17 +9864,17 @@
             "FSTP_D $dst" %}
   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
-              OpcReg_F(src),
-              Pop_Reg_D(dst) );
+              OpcReg_FPR(src),
+              Pop_Reg_DPR(dst) );
   ins_pipe( fpu_reg_reg_mem );
 %}
 
 
-// MACRO3 -- addD a mulD
+// MACRO3 -- addDPR a mulDPR
 // This instruction is a '2-address' instruction in that the result goes
 // back to src2.  This eliminates a move from the macro; possibly the
 // register allocator will have to add it back (and maybe not).
-instruct addD_mulD_reg(regD src2, regD src1, regD src0) %{
+instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
   predicate( UseSSE<=1 );
   match(Set src2 (AddD (MulD src0 src1) src2));
   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
@@ -10613,29 +9882,29 @@
             "DADDp  $src2,ST" %}
   ins_cost(250);
   opcode(0xDD); /* LoadD DD /0 */
-  ins_encode( Push_Reg_F(src0),
+  ins_encode( Push_Reg_FPR(src0),
               FMul_ST_reg(src1),
               FAddP_reg_ST(src2) );
   ins_pipe( fpu_reg_reg_reg );
 %}
 
 
-// MACRO3 -- subD a mulD
-instruct subD_mulD_reg(regD src2, regD src1, regD src0) %{
+// MACRO3 -- subDPR a mulDPR
+instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
   predicate( UseSSE<=1 );
   match(Set src2 (SubD (MulD src0 src1) src2));
   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
             "DMUL   ST,$src1\n\t"
             "DSUBRp $src2,ST" %}
   ins_cost(250);
-  ins_encode( Push_Reg_F(src0),
+  ins_encode( Push_Reg_FPR(src0),
               FMul_ST_reg(src1),
               Opcode(0xDE), Opc_plus(0xE0,src2));
   ins_pipe( fpu_reg_reg_reg );
 %}
 
 
-instruct divD_reg(regD dst, regD src) %{
+instruct divDPR_reg(regDPR dst, regDPR src) %{
   predicate( UseSSE<=1 );
   match(Set dst (DivD dst src));
 
@@ -10643,7 +9912,7 @@
             "FDIVp  $dst,ST" %}
   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
   ins_cost(150);
-  ins_encode( Push_Reg_D(src),
+  ins_encode( Push_Reg_DPR(src),
               OpcP, RegOpc(dst) );
   ins_pipe( fpu_reg_reg );
 %}
@@ -10656,7 +9925,7 @@
 // divide scaled dividend by divisor
 // rescale quotient by 2^(15360)
 //
-instruct strictfp_divD_reg(regDPR1 dst, regnotDPR1 src) %{
+instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
   predicate (UseSSE<=1);
   match(Set dst (DivD dst src));
   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
@@ -10670,13 +9939,13 @@
             "DMULp  $dst,ST\n\t" %}
   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
   ins_encode( strictfp_bias1(dst),
-              Push_Reg_D(src),
+              Push_Reg_DPR(src),
               OpcP, RegOpc(dst),
               strictfp_bias2(dst) );
   ins_pipe( fpu_reg_reg );
 %}
 
-instruct divD_reg_round(stackSlotD dst, regD src1, regD src2) %{
+instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
   predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
   match(Set dst (RoundDouble (DivD src1 src2)));
 
@@ -10684,27 +9953,27 @@
             "FDIV   ST,$src2\n\t"
             "FSTP_D $dst\t# D-round" %}
   opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
-  ins_encode( Push_Reg_D(src1),
-              OpcP, RegOpc(src2), Pop_Mem_D(dst) );
+  ins_encode( Push_Reg_DPR(src1),
+              OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
   ins_pipe( fpu_mem_reg_reg );
 %}
 
 
-instruct modD_reg(regD dst, regD src, eAXRegI rax, eFlagsReg cr) %{
+instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
   predicate(UseSSE<=1);
   match(Set dst (ModD dst src));
-  effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
+  effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
 
   format %{ "DMOD   $dst,$src" %}
   ins_cost(250);
-  ins_encode(Push_Reg_Mod_D(dst, src),
-              emitModD(),
-              Push_Result_Mod_D(src),
-              Pop_Reg_D(dst));
-  ins_pipe( pipe_slow );
-%}
-
-instruct modXD_reg(regXD dst, regXD src0, regXD src1, eAXRegI rax, eFlagsReg cr) %{
+  ins_encode(Push_Reg_Mod_DPR(dst, src),
+              emitModDPR(),
+              Push_Result_Mod_DPR(src),
+              Pop_Reg_DPR(dst));
+  ins_pipe( pipe_slow );
+%}
+
+instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
   predicate(UseSSE>=2);
   match(Set dst (ModD src0 src1));
   effect(KILL rax, KILL cr);
@@ -10725,11 +9994,11 @@
           "\tFSTP   ST0\t # Restore FPU Stack"
     %}
   ins_cost(250);
-  ins_encode( Push_ModD_encoding(src0, src1), emitModD(), Push_ResultXD(dst), PopFPU);
-  ins_pipe( pipe_slow );
-%}
-
-instruct sinD_reg(regDPR1 dst, regDPR1 src) %{
+  ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
+  ins_pipe( pipe_slow );
+%}
+
+instruct sinDPR_reg(regDPR1 dst, regDPR1 src) %{
   predicate (UseSSE<=1);
   match(Set dst (SinD src));
   ins_cost(1800);
@@ -10739,18 +10008,18 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct sinXD_reg(regXD dst, eFlagsReg cr) %{
+instruct sinD_reg(regD dst, eFlagsReg cr) %{
   predicate (UseSSE>=2);
   match(Set dst (SinD dst));
-  effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
+  effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
   ins_cost(1800);
   format %{ "DSIN   $dst" %}
   opcode(0xD9, 0xFE);
-  ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
-  ins_pipe( pipe_slow );
-%}
-
-instruct cosD_reg(regDPR1 dst, regDPR1 src) %{
+  ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
+  ins_pipe( pipe_slow );
+%}
+
+instruct cosDPR_reg(regDPR1 dst, regDPR1 src) %{
   predicate (UseSSE<=1);
   match(Set dst (CosD src));
   ins_cost(1800);
@@ -10760,18 +10029,18 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct cosXD_reg(regXD dst, eFlagsReg cr) %{
+instruct cosD_reg(regD dst, eFlagsReg cr) %{
   predicate (UseSSE>=2);
   match(Set dst (CosD dst));
-  effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
+  effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
   ins_cost(1800);
   format %{ "DCOS   $dst" %}
   opcode(0xD9, 0xFF);
-  ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
-  ins_pipe( pipe_slow );
-%}
-
-instruct tanD_reg(regDPR1 dst, regDPR1 src) %{
+  ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
+  ins_pipe( pipe_slow );
+%}
+
+instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{
   predicate (UseSSE<=1);
   match(Set dst(TanD src));
   format %{ "DTAN   $dst" %}
@@ -10780,50 +10049,50 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct tanXD_reg(regXD dst, eFlagsReg cr) %{
+instruct tanD_reg(regD dst, eFlagsReg cr) %{
   predicate (UseSSE>=2);
   match(Set dst(TanD dst));
-  effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
+  effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
   format %{ "DTAN   $dst" %}
-  ins_encode( Push_SrcXD(dst),
+  ins_encode( Push_SrcD(dst),
               Opcode(0xD9), Opcode(0xF2),    // fptan
               Opcode(0xDD), Opcode(0xD8),   // fstp st
-              Push_ResultXD(dst) );
-  ins_pipe( pipe_slow );
-%}
-
-instruct atanD_reg(regD dst, regD src) %{
+              Push_ResultD(dst) );
+  ins_pipe( pipe_slow );
+%}
+
+instruct atanDPR_reg(regDPR dst, regDPR src) %{
   predicate (UseSSE<=1);
   match(Set dst(AtanD dst src));
   format %{ "DATA   $dst,$src" %}
   opcode(0xD9, 0xF3);
-  ins_encode( Push_Reg_D(src),
+  ins_encode( Push_Reg_DPR(src),
               OpcP, OpcS, RegOpc(dst) );
   ins_pipe( pipe_slow );
 %}
 
-instruct atanXD_reg(regXD dst, regXD src, eFlagsReg cr) %{
+instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
   predicate (UseSSE>=2);
   match(Set dst(AtanD dst src));
-  effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
+  effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
   format %{ "DATA   $dst,$src" %}
   opcode(0xD9, 0xF3);
-  ins_encode( Push_SrcXD(src),
-              OpcP, OpcS, Push_ResultXD(dst) );
-  ins_pipe( pipe_slow );
-%}
-
-instruct sqrtD_reg(regD dst, regD src) %{
+  ins_encode( Push_SrcD(src),
+              OpcP, OpcS, Push_ResultD(dst) );
+  ins_pipe( pipe_slow );
+%}
+
+instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
   predicate (UseSSE<=1);
   match(Set dst (SqrtD src));
   format %{ "DSQRT  $dst,$src" %}
   opcode(0xFA, 0xD9);
-  ins_encode( Push_Reg_D(src),
-              OpcS, OpcP, Pop_Reg_D(dst) );
-  ins_pipe( pipe_slow );
-%}
-
-instruct powD_reg(regD X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
+  ins_encode( Push_Reg_DPR(src),
+              OpcS, OpcP, Pop_Reg_DPR(dst) );
+  ins_pipe( pipe_slow );
+%}
+
+instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
   predicate (UseSSE<=1);
   match(Set Y (PowD X Y));  // Raise X to the Yth power
   effect(KILL rax, KILL rbx, KILL rcx);
@@ -10852,14 +10121,14 @@
             "ADD    ESP,8"
              %}
   ins_encode( push_stack_temp_qword,
-              Push_Reg_D(X),
+              Push_Reg_DPR(X),
               Opcode(0xD9), Opcode(0xF1),   // fyl2x
               pow_exp_core_encoding,
               pop_stack_temp_qword);
   ins_pipe( pipe_slow );
 %}
 
-instruct powXD_reg(regXD dst, regXD src0, regXD src1, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx ) %{
+instruct powD_reg(regD dst, regD src0, regD src1, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx ) %{
   predicate (UseSSE>=2);
   match(Set dst (PowD src0 src1));  // Raise src0 to the src1'th power
   effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx );
@@ -10897,12 +10166,12 @@
               push_xmm_to_fpr1(src0),
               Opcode(0xD9), Opcode(0xF1),   // fyl2x
               pow_exp_core_encoding,
-              Push_ResultXD(dst) );
-  ins_pipe( pipe_slow );
-%}
-
-
-instruct expD_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
+              Push_ResultD(dst) );
+  ins_pipe( pipe_slow );
+%}
+
+
+instruct expDPR_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
   predicate (UseSSE<=1);
   match(Set dpr1 (ExpD dpr1));
   effect(KILL rax, KILL rbx, KILL rcx);
@@ -10938,7 +10207,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct expXD_reg(regXD dst, regXD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
+instruct expD_reg(regD dst, regD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
   predicate (UseSSE>=2);
   match(Set dst (ExpD src));
   effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx);
@@ -10969,17 +10238,17 @@
             "MOVSD  $dst,[ESP]\n\t"
             "ADD    ESP,8"
              %}
-  ins_encode( Push_SrcXD(src),
+  ins_encode( Push_SrcD(src),
               Opcode(0xD9), Opcode(0xEA),   // fldl2e
               Opcode(0xDE), Opcode(0xC9),   // fmulp
               pow_exp_core_encoding,
-              Push_ResultXD(dst) );
-  ins_pipe( pipe_slow );
-%}
-
-
-
-instruct log10D_reg(regDPR1 dst, regDPR1 src) %{
+              Push_ResultD(dst) );
+  ins_pipe( pipe_slow );
+%}
+
+
+
+instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
   predicate (UseSSE<=1);
   // The source Double operand on FPU stack
   match(Set dst (Log10D src));
@@ -10997,7 +10266,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct log10XD_reg(regXD dst, regXD src, eFlagsReg cr) %{
+instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{
   predicate (UseSSE>=2);
   effect(KILL cr);
   match(Set dst (Log10D src));
@@ -11007,14 +10276,14 @@
             "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
          %}
   ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
-              Push_SrcXD(src),
+              Push_SrcD(src),
               Opcode(0xD9), Opcode(0xF1),   // fyl2x
-              Push_ResultXD(dst));
-
-  ins_pipe( pipe_slow );
-%}
-
-instruct logD_reg(regDPR1 dst, regDPR1 src) %{
+              Push_ResultD(dst));
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct logDPR_reg(regDPR1 dst, regDPR1 src) %{
   predicate (UseSSE<=1);
   // The source Double operand on FPU stack
   match(Set dst (LogD src));
@@ -11032,7 +10301,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct logXD_reg(regXD dst, regXD src, eFlagsReg cr) %{
+instruct logD_reg(regD dst, regD src, eFlagsReg cr) %{
   predicate (UseSSE>=2);
   effect(KILL cr);
   // The source and result Double operands in XMM registers
@@ -11043,9 +10312,9 @@
             "FYL2X  \t\t\t# Q=Log_e*Log_2(x)"
          %}
   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
-              Push_SrcXD(src),
+              Push_SrcD(src),
               Opcode(0xD9), Opcode(0xF1),   // fyl2x
-              Push_ResultXD(dst));
+              Push_ResultD(dst));
   ins_pipe( pipe_slow );
 %}
 
@@ -11066,7 +10335,7 @@
 //   exit:
 
 // P6 version of float compare, sets condition codes in EFLAGS
-instruct cmpF_cc_P6(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{
+instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
   predicate(VM_Version::supports_cmov() && UseSSE == 0);
   match(Set cr (CmpF src1 src2));
   effect(KILL rax);
@@ -11078,27 +10347,27 @@
             "SAHF\n"
      "exit:\tNOP               // avoid branch to branch" %}
   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
-  ins_encode( Push_Reg_D(src1),
+  ins_encode( Push_Reg_DPR(src1),
               OpcP, RegOpc(src2),
               cmpF_P6_fixup );
   ins_pipe( pipe_slow );
 %}
 
-instruct cmpF_cc_P6CF(eFlagsRegUCF cr, regF src1, regF src2) %{
+instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
   predicate(VM_Version::supports_cmov() && UseSSE == 0);
   match(Set cr (CmpF src1 src2));
   ins_cost(100);
   format %{ "FLD    $src1\n\t"
             "FUCOMIP ST,$src2  // P6 instruction" %}
   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
-  ins_encode( Push_Reg_D(src1),
+  ins_encode( Push_Reg_DPR(src1),
               OpcP, RegOpc(src2));
   ins_pipe( pipe_slow );
 %}
 
 
 // Compare & branch
-instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{
+instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
   predicate(UseSSE == 0);
   match(Set cr (CmpF src1 src2));
   effect(KILL rax);
@@ -11111,328 +10380,190 @@
             "MOV    AH,1\t# unordered treat as LT\n"
     "flags:\tSAHF" %}
   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
-  ins_encode( Push_Reg_D(src1),
+  ins_encode( Push_Reg_DPR(src1),
               OpcP, RegOpc(src2),
               fpu_flags);
   ins_pipe( pipe_slow );
 %}
 
 // Compare vs zero into -1,0,1
-instruct cmpF_0(eRegI dst, regF src1, immF0 zero, eAXRegI rax, eFlagsReg cr) %{
+instruct cmpFPR_0(eRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
   predicate(UseSSE == 0);
   match(Set dst (CmpF3 src1 zero));
   effect(KILL cr, KILL rax);
   ins_cost(280);
   format %{ "FTSTF  $dst,$src1" %}
   opcode(0xE4, 0xD9);
-  ins_encode( Push_Reg_D(src1),
+  ins_encode( Push_Reg_DPR(src1),
               OpcS, OpcP, PopFPU,
               CmpF_Result(dst));
   ins_pipe( pipe_slow );
 %}
 
 // Compare into -1,0,1
-instruct cmpF_reg(eRegI dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{
+instruct cmpFPR_reg(eRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
   predicate(UseSSE == 0);
   match(Set dst (CmpF3 src1 src2));
   effect(KILL cr, KILL rax);
   ins_cost(300);
   format %{ "FCMPF  $dst,$src1,$src2" %}
   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
-  ins_encode( Push_Reg_D(src1),
+  ins_encode( Push_Reg_DPR(src1),
               OpcP, RegOpc(src2),
               CmpF_Result(dst));
   ins_pipe( pipe_slow );
 %}
 
 // float compare and set condition codes in EFLAGS by XMM regs
-instruct cmpX_cc(eFlagsRegU cr, regX dst, regX src, eAXRegI rax) %{
+instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
   predicate(UseSSE>=1);
-  match(Set cr (CmpF dst src));
-  effect(KILL rax);
+  match(Set cr (CmpF src1 src2));
   ins_cost(145);
-  format %{ "COMISS $dst,$src\n"
-          "\tJNP    exit\n"
-          "\tMOV    ah,1       // saw a NaN, set CF\n"
-          "\tSAHF\n"
-     "exit:\tNOP               // avoid branch to branch" %}
-  opcode(0x0F, 0x2F);
-  ins_encode(OpcP, OpcS, RegReg(dst, src), cmpF_P6_fixup);
-  ins_pipe( pipe_slow );
-%}
-
-instruct cmpX_ccCF(eFlagsRegUCF cr, regX dst, regX src) %{
+  format %{ "UCOMISS $src1,$src2\n\t"
+            "JNP,s   exit\n\t"
+            "PUSHF\t# saw NaN, set CF\n\t"
+            "AND     [rsp], #0xffffff2b\n\t"
+            "POPF\n"
+    "exit:" %}
+  ins_encode %{
+    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
+    emit_cmpfp_fixup(_masm);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
   predicate(UseSSE>=1);
-  match(Set cr (CmpF dst src));
+  match(Set cr (CmpF src1 src2));
   ins_cost(100);
-  format %{ "COMISS $dst,$src" %}
-  opcode(0x0F, 0x2F);
-  ins_encode(OpcP, OpcS, RegReg(dst, src));
+  format %{ "UCOMISS $src1,$src2" %}
+  ins_encode %{
+    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // float compare and set condition codes in EFLAGS by XMM regs
-instruct cmpX_ccmem(eFlagsRegU cr, regX dst, memory src, eAXRegI rax) %{
+instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
   predicate(UseSSE>=1);
-  match(Set cr (CmpF dst (LoadF src)));
-  effect(KILL rax);
+  match(Set cr (CmpF src1 (LoadF src2)));
   ins_cost(165);
-  format %{ "COMISS $dst,$src\n"
-          "\tJNP    exit\n"
-          "\tMOV    ah,1       // saw a NaN, set CF\n"
-          "\tSAHF\n"
-     "exit:\tNOP               // avoid branch to branch" %}
-  opcode(0x0F, 0x2F);
-  ins_encode(OpcP, OpcS, RegMem(dst, src), cmpF_P6_fixup);
-  ins_pipe( pipe_slow );
-%}
-
-instruct cmpX_ccmemCF(eFlagsRegUCF cr, regX dst, memory src) %{
+  format %{ "UCOMISS $src1,$src2\n\t"
+            "JNP,s   exit\n\t"
+            "PUSHF\t# saw NaN, set CF\n\t"
+            "AND     [rsp], #0xffffff2b\n\t"
+            "POPF\n"
+    "exit:" %}
+  ins_encode %{
+    __ ucomiss($src1$$XMMRegister, $src2$$Address);
+    emit_cmpfp_fixup(_masm);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
   predicate(UseSSE>=1);
-  match(Set cr (CmpF dst (LoadF src)));
+  match(Set cr (CmpF src1 (LoadF src2)));
   ins_cost(100);
-  format %{ "COMISS $dst,$src" %}
-  opcode(0x0F, 0x2F);
-  ins_encode(OpcP, OpcS, RegMem(dst, src));
+  format %{ "UCOMISS $src1,$src2" %}
+  ins_encode %{
+    __ ucomiss($src1$$XMMRegister, $src2$$Address);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Compare into -1,0,1 in XMM
-instruct cmpX_reg(eRegI dst, regX src1, regX src2, eFlagsReg cr) %{
+instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
   predicate(UseSSE>=1);
   match(Set dst (CmpF3 src1 src2));
   effect(KILL cr);
   ins_cost(255);
-  format %{ "XOR    $dst,$dst\n"
-          "\tCOMISS $src1,$src2\n"
-          "\tJP,s   nan\n"
-          "\tJEQ,s  exit\n"
-          "\tJA,s   inc\n"
-      "nan:\tDEC    $dst\n"
-          "\tJMP,s  exit\n"
-      "inc:\tINC    $dst\n"
-      "exit:"
-                %}
-  opcode(0x0F, 0x2F);
-  ins_encode(Xor_Reg(dst), OpcP, OpcS, RegReg(src1, src2), CmpX_Result(dst));
+  format %{ "UCOMISS $src1, $src2\n\t"
+            "MOV     $dst, #-1\n\t"
+            "JP,s    done\n\t"
+            "JB,s    done\n\t"
+            "SETNE   $dst\n\t"
+            "MOVZB   $dst, $dst\n"
+    "done:" %}
+  ins_encode %{
+    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
+    emit_cmpfp3(_masm, $dst$$Register);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Compare into -1,0,1 in XMM and memory
-instruct cmpX_regmem(eRegI dst, regX src1, memory mem, eFlagsReg cr) %{
+instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
   predicate(UseSSE>=1);
-  match(Set dst (CmpF3 src1 (LoadF mem)));
+  match(Set dst (CmpF3 src1 (LoadF src2)));
   effect(KILL cr);
   ins_cost(275);
-  format %{ "COMISS $src1,$mem\n"
-          "\tMOV    $dst,0\t\t# do not blow flags\n"
-          "\tJP,s   nan\n"
-          "\tJEQ,s  exit\n"
-          "\tJA,s   inc\n"
-      "nan:\tDEC    $dst\n"
-          "\tJMP,s  exit\n"
-      "inc:\tINC    $dst\n"
-      "exit:"
-                %}
-  opcode(0x0F, 0x2F);
-  ins_encode(OpcP, OpcS, RegMem(src1, mem), LdImmI(dst,0x0), CmpX_Result(dst));
+  format %{ "UCOMISS $src1, $src2\n\t"
+            "MOV     $dst, #-1\n\t"
+            "JP,s    done\n\t"
+            "JB,s    done\n\t"
+            "SETNE   $dst\n\t"
+            "MOVZB   $dst, $dst\n"
+    "done:" %}
+  ins_encode %{
+    __ ucomiss($src1$$XMMRegister, $src2$$Address);
+    emit_cmpfp3(_masm, $dst$$Register);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Spill to obtain 24-bit precision
-instruct subF24_reg(stackSlotF dst, regF src1, regF src2) %{
+instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
   match(Set dst (SubF src1 src2));
 
   format %{ "FSUB   $dst,$src1 - $src2" %}
   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
-  ins_encode( Push_Reg_F(src1),
-              OpcReg_F(src2),
-              Pop_Mem_F(dst) );
+  ins_encode( Push_Reg_FPR(src1),
+              OpcReg_FPR(src2),
+              Pop_Mem_FPR(dst) );
   ins_pipe( fpu_mem_reg_reg );
 %}
 //
 // This instruction does not round to 24-bits
-instruct subF_reg(regF dst, regF src) %{
+instruct subFPR_reg(regFPR dst, regFPR src) %{
   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
   match(Set dst (SubF dst src));
 
   format %{ "FSUB   $dst,$src" %}
   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
-  ins_encode( Push_Reg_F(src),
+  ins_encode( Push_Reg_FPR(src),
               OpcP, RegOpc(dst) );
   ins_pipe( fpu_reg_reg );
 %}
 
 // Spill to obtain 24-bit precision
-instruct addF24_reg(stackSlotF dst, regF src1, regF src2) %{
+instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
   match(Set dst (AddF src1 src2));
 
   format %{ "FADD   $dst,$src1,$src2" %}
   opcode(0xD8, 0x0); /* D8 C0+i */
-  ins_encode( Push_Reg_F(src2),
-              OpcReg_F(src1),
-              Pop_Mem_F(dst) );
+  ins_encode( Push_Reg_FPR(src2),
+              OpcReg_FPR(src1),
+              Pop_Mem_FPR(dst) );
   ins_pipe( fpu_mem_reg_reg );
 %}
 //
 // This instruction does not round to 24-bits
-instruct addF_reg(regF dst, regF src) %{
+instruct addFPR_reg(regFPR dst, regFPR src) %{
   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
   match(Set dst (AddF dst src));
 
   format %{ "FLD    $src\n\t"
             "FADDp  $dst,ST" %}
   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
-  ins_encode( Push_Reg_F(src),
+  ins_encode( Push_Reg_FPR(src),
               OpcP, RegOpc(dst) );
   ins_pipe( fpu_reg_reg );
 %}
 
-// Add two single precision floating point values in xmm
-instruct addX_reg(regX dst, regX src) %{
-  predicate(UseSSE>=1);
-  match(Set dst (AddF dst src));
-  format %{ "ADDSS  $dst,$src" %}
-  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegReg(dst, src));
-  ins_pipe( pipe_slow );
-%}
-
-instruct addX_imm(regX dst, immXF con) %{
-  predicate(UseSSE>=1);
-  match(Set dst (AddF dst con));
-  format %{ "ADDSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
-  ins_encode %{
-    __ addss($dst$$XMMRegister, $constantaddress($con));
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct addX_mem(regX dst, memory mem) %{
-  predicate(UseSSE>=1);
-  match(Set dst (AddF dst (LoadF mem)));
-  format %{ "ADDSS  $dst,$mem" %}
-  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegMem(dst, mem));
-  ins_pipe( pipe_slow );
-%}
-
-// Subtract two single precision floating point values in xmm
-instruct subX_reg(regX dst, regX src) %{
-  predicate(UseSSE>=1);
-  match(Set dst (SubF dst src));
-  format %{ "SUBSS  $dst,$src" %}
-  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src));
-  ins_pipe( pipe_slow );
-%}
-
-instruct subX_imm(regX dst, immXF con) %{
-  predicate(UseSSE>=1);
-  match(Set dst (SubF dst con));
-  format %{ "SUBSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
-  ins_encode %{
-    __ subss($dst$$XMMRegister, $constantaddress($con));
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct subX_mem(regX dst, memory mem) %{
-  predicate(UseSSE>=1);
-  match(Set dst (SubF dst (LoadF mem)));
-  format %{ "SUBSS  $dst,$mem" %}
-  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem));
-  ins_pipe( pipe_slow );
-%}
-
-// Multiply two single precision floating point values in xmm
-instruct mulX_reg(regX dst, regX src) %{
-  predicate(UseSSE>=1);
-  match(Set dst (MulF dst src));
-  format %{ "MULSS  $dst,$src" %}
-  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegReg(dst, src));
-  ins_pipe( pipe_slow );
-%}
-
-instruct mulX_imm(regX dst, immXF con) %{
-  predicate(UseSSE>=1);
-  match(Set dst (MulF dst con));
-  format %{ "MULSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
-  ins_encode %{
-    __ mulss($dst$$XMMRegister, $constantaddress($con));
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct mulX_mem(regX dst, memory mem) %{
-  predicate(UseSSE>=1);
-  match(Set dst (MulF dst (LoadF mem)));
-  format %{ "MULSS  $dst,$mem" %}
-  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem));
-  ins_pipe( pipe_slow );
-%}
-
-// Divide two single precision floating point values in xmm
-instruct divX_reg(regX dst, regX src) %{
-  predicate(UseSSE>=1);
-  match(Set dst (DivF dst src));
-  format %{ "DIVSS  $dst,$src" %}
-  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src));
-  ins_pipe( pipe_slow );
-%}
-
-instruct divX_imm(regX dst, immXF con) %{
-  predicate(UseSSE>=1);
-  match(Set dst (DivF dst con));
-  format %{ "DIVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
-  ins_encode %{
-    __ divss($dst$$XMMRegister, $constantaddress($con));
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct divX_mem(regX dst, memory mem) %{
-  predicate(UseSSE>=1);
-  match(Set dst (DivF dst (LoadF mem)));
-  format %{ "DIVSS  $dst,$mem" %}
-  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem));
-  ins_pipe( pipe_slow );
-%}
-
-// Get the square root of a single precision floating point values in xmm
-instruct sqrtX_reg(regX dst, regX src) %{
-  predicate(UseSSE>=1);
-  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
-  format %{ "SQRTSS $dst,$src" %}
-  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegReg(dst, src));
-  ins_pipe( pipe_slow );
-%}
-
-instruct sqrtX_mem(regX dst, memory mem) %{
-  predicate(UseSSE>=1);
-  match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF mem)))));
-  format %{ "SQRTSS $dst,$mem" %}
-  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem));
-  ins_pipe( pipe_slow );
-%}
-
-// Get the square root of a double precision floating point values in xmm
-instruct sqrtXD_reg(regXD dst, regXD src) %{
-  predicate(UseSSE>=2);
-  match(Set dst (SqrtD src));
-  format %{ "SQRTSD $dst,$src" %}
-  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegReg(dst, src));
-  ins_pipe( pipe_slow );
-%}
-
-instruct sqrtXD_mem(regXD dst, memory mem) %{
-  predicate(UseSSE>=2);
-  match(Set dst (SqrtD (LoadD mem)));
-  format %{ "SQRTSD $dst,$mem" %}
-  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem));
-  ins_pipe( pipe_slow );
-%}
-
-instruct absF_reg(regFPR1 dst, regFPR1 src) %{
+instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
   predicate(UseSSE==0);
   match(Set dst (AbsF src));
   ins_cost(100);
@@ -11442,15 +10573,7 @@
   ins_pipe( fpu_reg_reg );
 %}
 
-instruct absX_reg(regX dst ) %{
-  predicate(UseSSE>=1);
-  match(Set dst (AbsF dst));
-  format %{ "ANDPS  $dst,[0x7FFFFFFF]\t# ABS F by sign masking" %}
-  ins_encode( AbsXF_encoding(dst));
-  ins_pipe( pipe_slow );
-%}
-
-instruct negF_reg(regFPR1 dst, regFPR1 src) %{
+instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
   predicate(UseSSE==0);
   match(Set dst (NegF src));
   ins_cost(100);
@@ -11460,17 +10583,9 @@
   ins_pipe( fpu_reg_reg );
 %}
 
-instruct negX_reg( regX dst ) %{
-  predicate(UseSSE>=1);
-  match(Set dst (NegF dst));
-  format %{ "XORPS  $dst,[0x80000000]\t# CHS F by sign flipping" %}
-  ins_encode( NegXF_encoding(dst));
-  ins_pipe( pipe_slow );
-%}
-
-// Cisc-alternate to addF_reg
+// Cisc-alternate to addFPR_reg
 // Spill to obtain 24-bit precision
-instruct addF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
+instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
   match(Set dst (AddF src1 (LoadF src2)));
 
@@ -11479,14 +10594,14 @@
             "FSTP_S $dst" %}
   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
-              OpcReg_F(src1),
-              Pop_Mem_F(dst) );
+              OpcReg_FPR(src1),
+              Pop_Mem_FPR(dst) );
   ins_pipe( fpu_mem_reg_mem );
 %}
 //
-// Cisc-alternate to addF_reg
+// Cisc-alternate to addFPR_reg
 // This instruction does not round to 24-bits
-instruct addF_reg_mem(regF dst, memory src) %{
+instruct addFPR_reg_mem(regFPR dst, memory src) %{
   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
   match(Set dst (AddF dst (LoadF src)));
 
@@ -11499,21 +10614,21 @@
 
 // // Following two instructions for _222_mpegaudio
 // Spill to obtain 24-bit precision
-instruct addF24_mem_reg(stackSlotF dst, regF src2, memory src1 ) %{
+instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
   match(Set dst (AddF src1 src2));
 
   format %{ "FADD   $dst,$src1,$src2" %}
   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
-              OpcReg_F(src2),
-              Pop_Mem_F(dst) );
+              OpcReg_FPR(src2),
+              Pop_Mem_FPR(dst) );
   ins_pipe( fpu_mem_reg_mem );
 %}
 
 // Cisc-spill variant
 // Spill to obtain 24-bit precision
-instruct addF24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
+instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
   match(Set dst (AddF src1 (LoadF src2)));
 
@@ -11522,12 +10637,12 @@
   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
               set_instruction_start,
               OpcP, RMopc_Mem(secondary,src1),
-              Pop_Mem_F(dst) );
+              Pop_Mem_FPR(dst) );
   ins_pipe( fpu_mem_mem_mem );
 %}
 
 // Spill to obtain 24-bit precision
-instruct addF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
+instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
   match(Set dst (AddF src1 src2));
 
@@ -11536,13 +10651,13 @@
   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
               set_instruction_start,
               OpcP, RMopc_Mem(secondary,src1),
-              Pop_Mem_F(dst) );
+              Pop_Mem_FPR(dst) );
   ins_pipe( fpu_mem_mem_mem );
 %}
 
 
 // Spill to obtain 24-bit precision
-instruct addF24_reg_imm(stackSlotF dst, regF src, immF con) %{
+instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
   match(Set dst (AddF src con));
   format %{ "FLD    $src\n\t"
@@ -11557,7 +10672,7 @@
 %}
 //
 // This instruction does not round to 24-bits
-instruct addF_reg_imm(regF dst, regF src, immF con) %{
+instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
   match(Set dst (AddF src con));
   format %{ "FLD    $src\n\t"
@@ -11572,7 +10687,7 @@
 %}
 
 // Spill to obtain 24-bit precision
-instruct mulF24_reg(stackSlotF dst, regF src1, regF src2) %{
+instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
   match(Set dst (MulF src1 src2));
 
@@ -11580,14 +10695,14 @@
             "FMUL   $src2\n\t"
             "FSTP_S $dst"  %}
   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
-  ins_encode( Push_Reg_F(src1),
-              OpcReg_F(src2),
-              Pop_Mem_F(dst) );
+  ins_encode( Push_Reg_FPR(src1),
+              OpcReg_FPR(src2),
+              Pop_Mem_FPR(dst) );
   ins_pipe( fpu_mem_reg_reg );
 %}
 //
 // This instruction does not round to 24-bits
-instruct mulF_reg(regF dst, regF src1, regF src2) %{
+instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
   match(Set dst (MulF src1 src2));
 
@@ -11595,16 +10710,16 @@
             "FMUL   $src2\n\t"
             "FSTP_S $dst"  %}
   opcode(0xD8, 0x1); /* D8 C8+i */
-  ins_encode( Push_Reg_F(src2),
-              OpcReg_F(src1),
-              Pop_Reg_F(dst) );
+  ins_encode( Push_Reg_FPR(src2),
+              OpcReg_FPR(src1),
+              Pop_Reg_FPR(dst) );
   ins_pipe( fpu_reg_reg_reg );
 %}
 
 
 // Spill to obtain 24-bit precision
 // Cisc-alternate to reg-reg multiply
-instruct mulF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
+instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
   match(Set dst (MulF src1 (LoadF src2)));
 
@@ -11613,27 +10728,27 @@
             "FSTP_S $dst"  %}
   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
-              OpcReg_F(src1),
-              Pop_Mem_F(dst) );
+              OpcReg_FPR(src1),
+              Pop_Mem_FPR(dst) );
   ins_pipe( fpu_mem_reg_mem );
 %}
 //
 // This instruction does not round to 24-bits
 // Cisc-alternate to reg-reg multiply
-instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
+instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
   match(Set dst (MulF src1 (LoadF src2)));
 
   format %{ "FMUL   $dst,$src1,$src2" %}
   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
-              OpcReg_F(src1),
-              Pop_Reg_F(dst) );
+              OpcReg_FPR(src1),
+              Pop_Reg_FPR(dst) );
   ins_pipe( fpu_reg_reg_mem );
 %}
 
 // Spill to obtain 24-bit precision
-instruct mulF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
+instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
   match(Set dst (MulF src1 src2));
 
@@ -11642,12 +10757,12 @@
   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
               set_instruction_start,
               OpcP, RMopc_Mem(secondary,src1),
-              Pop_Mem_F(dst) );
+              Pop_Mem_FPR(dst) );
   ins_pipe( fpu_mem_mem_mem );
 %}
 
 // Spill to obtain 24-bit precision
-instruct mulF24_reg_imm(stackSlotF dst, regF src, immF con) %{
+instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
   match(Set dst (MulF src con));
 
@@ -11663,7 +10778,7 @@
 %}
 //
 // This instruction does not round to 24-bits
-instruct mulF_reg_imm(regF dst, regF src, immF con) %{
+instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
   match(Set dst (MulF src con));
 
@@ -11680,9 +10795,9 @@
 
 
 //
-// MACRO1 -- subsume unshared load into mulF
+// MACRO1 -- subsume unshared load into mulFPR
 // This instruction does not round to 24-bits
-instruct mulF_reg_load1(regF dst, regF src, memory mem1 ) %{
+instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
   match(Set dst (MulF (LoadF mem1) src));
 
@@ -11691,36 +10806,36 @@
             "FSTP   $dst" %}
   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
-              OpcReg_F(src),
-              Pop_Reg_F(dst) );
+              OpcReg_FPR(src),
+              Pop_Reg_FPR(dst) );
   ins_pipe( fpu_reg_reg_mem );
 %}
 //
-// MACRO2 -- addF a mulF which subsumed an unshared load
+// MACRO2 -- addFPR a mulFPR which subsumed an unshared load
 // This instruction does not round to 24-bits
-instruct addF_mulF_reg_load1(regF dst, memory mem1, regF src1, regF src2) %{
+instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
   ins_cost(95);
 
   format %{ "FLD    $mem1     ===MACRO2===\n\t"
-            "FMUL   ST,$src1  subsume mulF left load\n\t"
+            "FMUL   ST,$src1  subsume mulFPR left load\n\t"
             "FADD   ST,$src2\n\t"
             "FSTP   $dst" %}
   opcode(0xD9); /* LoadF D9 /0 */
   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
               FMul_ST_reg(src1),
               FAdd_ST_reg(src2),
-              Pop_Reg_F(dst) );
+              Pop_Reg_FPR(dst) );
   ins_pipe( fpu_reg_mem_reg_reg );
 %}
 
-// MACRO3 -- addF a mulF
+// MACRO3 -- addFPR a mulFPR
 // This instruction does not round to 24-bits.  It is a '2-address'
 // instruction in that the result goes back to src2.  This eliminates
 // a move from the macro; possibly the register allocator will have
 // to add it back (and maybe not).
-instruct addF_mulF_reg(regF src2, regF src1, regF src0) %{
+instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
   match(Set src2 (AddF (MulF src0 src1) src2));
 
@@ -11728,15 +10843,15 @@
             "FMUL   ST,$src1\n\t"
             "FADDP  $src2,ST" %}
   opcode(0xD9); /* LoadF D9 /0 */
-  ins_encode( Push_Reg_F(src0),
+  ins_encode( Push_Reg_FPR(src0),
               FMul_ST_reg(src1),
               FAddP_reg_ST(src2) );
   ins_pipe( fpu_reg_reg_reg );
 %}
 
-// MACRO4 -- divF subF
+// MACRO4 -- divFPR subFPR
 // This instruction does not round to 24-bits
-instruct subF_divF_reg(regF dst, regF src1, regF src2, regF src3) %{
+instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
   match(Set dst (DivF (SubF src2 src1) src3));
 
@@ -11745,67 +10860,67 @@
             "FDIV   ST,$src3\n\t"
             "FSTP  $dst" %}
   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
-  ins_encode( Push_Reg_F(src2),
-              subF_divF_encode(src1,src3),
-              Pop_Reg_F(dst) );
+  ins_encode( Push_Reg_FPR(src2),
+              subFPR_divFPR_encode(src1,src3),
+              Pop_Reg_FPR(dst) );
   ins_pipe( fpu_reg_reg_reg_reg );
 %}
 
 // Spill to obtain 24-bit precision
-instruct divF24_reg(stackSlotF dst, regF src1, regF src2) %{
+instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
   match(Set dst (DivF src1 src2));
 
   format %{ "FDIV   $dst,$src1,$src2" %}
   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
-  ins_encode( Push_Reg_F(src1),
-              OpcReg_F(src2),
-              Pop_Mem_F(dst) );
+  ins_encode( Push_Reg_FPR(src1),
+              OpcReg_FPR(src2),
+              Pop_Mem_FPR(dst) );
   ins_pipe( fpu_mem_reg_reg );
 %}
 //
 // This instruction does not round to 24-bits
-instruct divF_reg(regF dst, regF src) %{
+instruct divFPR_reg(regFPR dst, regFPR src) %{
   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
   match(Set dst (DivF dst src));
 
   format %{ "FDIV   $dst,$src" %}
   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
-  ins_encode( Push_Reg_F(src),
+  ins_encode( Push_Reg_FPR(src),
               OpcP, RegOpc(dst) );
   ins_pipe( fpu_reg_reg );
 %}
 
 
 // Spill to obtain 24-bit precision
-instruct modF24_reg(stackSlotF dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{
+instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
   match(Set dst (ModF src1 src2));
-  effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
+  effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
 
   format %{ "FMOD   $dst,$src1,$src2" %}
-  ins_encode( Push_Reg_Mod_D(src1, src2),
-              emitModD(),
-              Push_Result_Mod_D(src2),
-              Pop_Mem_F(dst));
+  ins_encode( Push_Reg_Mod_DPR(src1, src2),
+              emitModDPR(),
+              Push_Result_Mod_DPR(src2),
+              Pop_Mem_FPR(dst));
   ins_pipe( pipe_slow );
 %}
 //
 // This instruction does not round to 24-bits
-instruct modF_reg(regF dst, regF src, eAXRegI rax, eFlagsReg cr) %{
+instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
   match(Set dst (ModF dst src));
-  effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
+  effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
 
   format %{ "FMOD   $dst,$src" %}
-  ins_encode(Push_Reg_Mod_D(dst, src),
-              emitModD(),
-              Push_Result_Mod_D(src),
-              Pop_Reg_F(dst));
-  ins_pipe( pipe_slow );
-%}
-
-instruct modX_reg(regX dst, regX src0, regX src1, eAXRegI rax, eFlagsReg cr) %{
+  ins_encode(Push_Reg_Mod_DPR(dst, src),
+              emitModDPR(),
+              Push_Result_Mod_DPR(src),
+              Pop_Reg_FPR(dst));
+  ins_pipe( pipe_slow );
+%}
+
+instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
   predicate(UseSSE>=1);
   match(Set dst (ModF src0 src1));
   effect(KILL rax, KILL cr);
@@ -11825,7 +10940,7 @@
           "\tFSTP   ST0\t # Restore FPU Stack"
     %}
   ins_cost(250);
-  ins_encode( Push_ModX_encoding(src0, src1), emitModD(), Push_ResultX(dst,0x4), PopFPU);
+  ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
   ins_pipe( pipe_slow );
 %}
 
@@ -11833,26 +10948,26 @@
 //----------Arithmetic Conversion Instructions---------------------------------
 // The conversions operations are all Alpha sorted.  Please keep it that way!
 
-instruct roundFloat_mem_reg(stackSlotF dst, regF src) %{
+instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
   predicate(UseSSE==0);
   match(Set dst (RoundFloat src));
   ins_cost(125);
   format %{ "FST_S  $dst,$src\t# F-round" %}
-  ins_encode( Pop_Mem_Reg_F(dst, src) );
+  ins_encode( Pop_Mem_Reg_FPR(dst, src) );
   ins_pipe( fpu_mem_reg );
 %}
 
-instruct roundDouble_mem_reg(stackSlotD dst, regD src) %{
+instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
   predicate(UseSSE<=1);
   match(Set dst (RoundDouble src));
   ins_cost(125);
   format %{ "FST_D  $dst,$src\t# D-round" %}
-  ins_encode( Pop_Mem_Reg_D(dst, src) );
+  ins_encode( Pop_Mem_Reg_DPR(dst, src) );
   ins_pipe( fpu_mem_reg );
 %}
 
 // Force rounding to 24-bit precision and 6-bit exponent
-instruct convD2F_reg(stackSlotF dst, regD src) %{
+instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
   predicate(UseSSE==0);
   match(Set dst (ConvD2F src));
   format %{ "FST_S  $dst,$src\t# F-round" %}
@@ -11862,7 +10977,7 @@
 %}
 
 // Force rounding to 24-bit precision and 6-bit exponent
-instruct convD2X_reg(regX dst, regD src, eFlagsReg cr) %{
+instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
   predicate(UseSSE==1);
   match(Set dst (ConvD2F src));
   effect( KILL cr );
@@ -11870,29 +10985,40 @@
             "FST_S  [ESP],$src\t# F-round\n\t"
             "MOVSS  $dst,[ESP]\n\t"
             "ADD ESP,4" %}
-  ins_encode( D2X_encoding(dst, src) );
+  ins_encode %{
+    __ subptr(rsp, 4);
+    if ($src$$reg != FPR1L_enc) {
+      __ fld_s($src$$reg-1);
+      __ fstp_s(Address(rsp, 0));
+    } else {
+      __ fst_s(Address(rsp, 0));
+    }
+    __ movflt($dst$$XMMRegister, Address(rsp, 0));
+    __ addptr(rsp, 4);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Force rounding double precision to single precision
-instruct convXD2X_reg(regX dst, regXD src) %{
+instruct convD2F_reg(regF dst, regD src) %{
   predicate(UseSSE>=2);
   match(Set dst (ConvD2F src));
   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
-  opcode(0xF2, 0x0F, 0x5A);
-  ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
-  ins_pipe( pipe_slow );
-%}
-
-instruct convF2D_reg_reg(regD dst, regF src) %{
+  ins_encode %{
+    __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
   predicate(UseSSE==0);
   match(Set dst (ConvF2D src));
   format %{ "FST_S  $dst,$src\t# D-round" %}
-  ins_encode( Pop_Reg_Reg_D(dst, src));
+  ins_encode( Pop_Reg_Reg_DPR(dst, src));
   ins_pipe( fpu_reg_reg );
 %}
 
-instruct convF2D_reg(stackSlotD dst, regF src) %{
+instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
   predicate(UseSSE==1);
   match(Set dst (ConvF2D src));
   format %{ "FST_D  $dst,$src\t# D-round" %}
@@ -11901,7 +11027,7 @@
   %}
 %}
 
-instruct convX2D_reg(regD dst, regX src, eFlagsReg cr) %{
+instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
   predicate(UseSSE==1);
   match(Set dst (ConvF2D src));
   effect( KILL cr );
@@ -11910,21 +11036,28 @@
             "FLD_S  [ESP]\n\t"
             "ADD    ESP,4\n\t"
             "FSTP   $dst\t# D-round" %}
-  ins_encode( X2D_encoding(dst, src), Pop_Reg_D(dst));
-  ins_pipe( pipe_slow );
-%}
-
-instruct convX2XD_reg(regXD dst, regX src) %{
+  ins_encode %{
+    __ subptr(rsp, 4);
+    __ movflt(Address(rsp, 0), $src$$XMMRegister);
+    __ fld_s(Address(rsp, 0));
+    __ addptr(rsp, 4);
+    __ fstp_d($dst$$reg);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct convF2D_reg(regD dst, regF src) %{
   predicate(UseSSE>=2);
   match(Set dst (ConvF2D src));
   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
-  opcode(0xF3, 0x0F, 0x5A);
-  ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
+  ins_encode %{
+    __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
-instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
+instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
   predicate(UseSSE<=1);
   match(Set dst (ConvD2I src));
   effect( KILL tmp, KILL cr );
@@ -11939,12 +11072,12 @@
             "FLD_D  $src\n\t"
             "CALL   d2i_wrapper\n"
       "fast:" %}
-  ins_encode( Push_Reg_D(src), D2I_encoding(src) );
+  ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
   ins_pipe( pipe_slow );
 %}
 
 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
-instruct convXD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regXD src, eFlagsReg cr ) %{
+instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
   predicate(UseSSE>=2);
   match(Set dst (ConvD2I src));
   effect( KILL tmp, KILL cr );
@@ -11957,12 +11090,22 @@
             "ADD    ESP, 8\n\t"
             "CALL   d2i_wrapper\n"
       "fast:" %}
-  opcode(0x1); // double-precision conversion
-  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst));
-  ins_pipe( pipe_slow );
-%}
-
-instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
+  ins_encode %{
+    Label fast;
+    __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
+    __ cmpl($dst$$Register, 0x80000000);
+    __ jccb(Assembler::notEqual, fast);
+    __ subptr(rsp, 8);
+    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
+    __ fld_d(Address(rsp, 0));
+    __ addptr(rsp, 8);
+    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
+    __ bind(fast);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
   predicate(UseSSE<=1);
   match(Set dst (ConvD2L src));
   effect( KILL cr );
@@ -11980,12 +11123,12 @@
             "FLD    $src\n\t"
             "CALL   d2l_wrapper\n"
       "fast:" %}
-  ins_encode( Push_Reg_D(src),  D2L_encoding(src) );
+  ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
   ins_pipe( pipe_slow );
 %}
 
 // XMM lacks a float/double->long conversion, so use the old FPU stack.
-instruct convXD2L_reg_reg( eADXRegL dst, regXD src, eFlagsReg cr ) %{
+instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
   predicate (UseSSE>=2);
   match(Set dst (ConvD2L src));
   effect( KILL cr );
@@ -12004,9 +11147,36 @@
             "SUB    ESP,8\n\t"
             "MOVSD  [ESP],$src\n\t"
             "FLD_D  [ESP]\n\t"
+            "ADD    ESP,8\n\t"
             "CALL   d2l_wrapper\n"
       "fast:" %}
-  ins_encode( XD2L_encoding(src) );
+  ins_encode %{
+    Label fast;
+    __ subptr(rsp, 8);
+    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
+    __ fld_d(Address(rsp, 0));
+    __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
+    __ fistp_d(Address(rsp, 0));
+    // Restore the rounding mode, mask the exception
+    if (Compile::current()->in_24_bit_fp_mode()) {
+      __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
+    } else {
+      __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
+    }
+    // Load the converted long, adjust CPU stack
+    __ pop(rax);
+    __ pop(rdx);
+    __ cmpl(rdx, 0x80000000);
+    __ jccb(Assembler::notEqual, fast);
+    __ testl(rax, rax);
+    __ jccb(Assembler::notEqual, fast);
+    __ subptr(rsp, 8);
+    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
+    __ fld_d(Address(rsp, 0));
+    __ addptr(rsp, 8);
+    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
+    __ bind(fast);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -12016,7 +11186,7 @@
 // rounding mode to 'nearest'.  The hardware stores a flag value down
 // if we would overflow or converted a NAN; we check for this and
 // and go the slow path if needed.
-instruct convF2I_reg_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
+instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
   predicate(UseSSE==0);
   match(Set dst (ConvF2I src));
   effect( KILL tmp, KILL cr );
@@ -12031,13 +11201,13 @@
             "FLD    $src\n\t"
             "CALL   d2i_wrapper\n"
       "fast:" %}
-  // D2I_encoding works for F2I
-  ins_encode( Push_Reg_F(src), D2I_encoding(src) );
+  // DPR2I_encoding works for FPR2I
+  ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
   ins_pipe( pipe_slow );
 %}
 
 // Convert a float in xmm to an int reg.
-instruct convX2I_reg(eAXRegI dst, eDXRegI tmp, regX src, eFlagsReg cr ) %{
+instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
   predicate(UseSSE>=1);
   match(Set dst (ConvF2I src));
   effect( KILL tmp, KILL cr );
@@ -12050,12 +11220,22 @@
             "ADD    ESP, 4\n\t"
             "CALL   d2i_wrapper\n"
       "fast:" %}
-  opcode(0x0); // single-precision conversion
-  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst));
-  ins_pipe( pipe_slow );
-%}
-
-instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
+  ins_encode %{
+    Label fast;
+    __ cvttss2sil($dst$$Register, $src$$XMMRegister);
+    __ cmpl($dst$$Register, 0x80000000);
+    __ jccb(Assembler::notEqual, fast);
+    __ subptr(rsp, 4);
+    __ movflt(Address(rsp, 0), $src$$XMMRegister);
+    __ fld_s(Address(rsp, 0));
+    __ addptr(rsp, 4);
+    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
+    __ bind(fast);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
   predicate(UseSSE==0);
   match(Set dst (ConvF2L src));
   effect( KILL cr );
@@ -12073,13 +11253,13 @@
             "FLD    $src\n\t"
             "CALL   d2l_wrapper\n"
       "fast:" %}
-  // D2L_encoding works for F2L
-  ins_encode( Push_Reg_F(src), D2L_encoding(src) );
+  // DPR2L_encoding works for FPR2L
+  ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
   ins_pipe( pipe_slow );
 %}
 
 // XMM lacks a float/double->long conversion, so use the old FPU stack.
-instruct convX2L_reg_reg( eADXRegL dst, regX src, eFlagsReg cr ) %{
+instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
   predicate (UseSSE>=1);
   match(Set dst (ConvF2L src));
   effect( KILL cr );
@@ -12101,39 +11281,67 @@
             "ADD    ESP,4\n\t"
             "CALL   d2l_wrapper\n"
       "fast:" %}
-  ins_encode( X2L_encoding(src) );
-  ins_pipe( pipe_slow );
-%}
-
-instruct convI2D_reg(regD dst, stackSlotI src) %{
+  ins_encode %{
+    Label fast;
+    __ subptr(rsp, 8);
+    __ movflt(Address(rsp, 0), $src$$XMMRegister);
+    __ fld_s(Address(rsp, 0));
+    __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
+    __ fistp_d(Address(rsp, 0));
+    // Restore the rounding mode, mask the exception
+    if (Compile::current()->in_24_bit_fp_mode()) {
+      __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
+    } else {
+      __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
+    }
+    // Load the converted long, adjust CPU stack
+    __ pop(rax);
+    __ pop(rdx);
+    __ cmpl(rdx, 0x80000000);
+    __ jccb(Assembler::notEqual, fast);
+    __ testl(rax, rax);
+    __ jccb(Assembler::notEqual, fast);
+    __ subptr(rsp, 4);
+    __ movflt(Address(rsp, 0), $src$$XMMRegister);
+    __ fld_s(Address(rsp, 0));
+    __ addptr(rsp, 4);
+    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
+    __ bind(fast);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
   predicate( UseSSE<=1 );
   match(Set dst (ConvI2D src));
   format %{ "FILD   $src\n\t"
             "FSTP   $dst" %}
   opcode(0xDB, 0x0);  /* DB /0 */
-  ins_encode(Push_Mem_I(src), Pop_Reg_D(dst));
+  ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
   ins_pipe( fpu_reg_mem );
 %}
 
-instruct convI2XD_reg(regXD dst, eRegI src) %{
+instruct convI2D_reg(regD dst, eRegI src) %{
   predicate( UseSSE>=2 && !UseXmmI2D );
   match(Set dst (ConvI2D src));
   format %{ "CVTSI2SD $dst,$src" %}
-  opcode(0xF2, 0x0F, 0x2A);
-  ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
-  ins_pipe( pipe_slow );
-%}
-
-instruct convI2XD_mem(regXD dst, memory mem) %{
+  ins_encode %{
+    __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct convI2D_mem(regD dst, memory mem) %{
   predicate( UseSSE>=2 );
   match(Set dst (ConvI2D (LoadI mem)));
   format %{ "CVTSI2SD $dst,$mem" %}
-  opcode(0xF2, 0x0F, 0x2A);
-  ins_encode( OpcP, OpcS, Opcode(tertiary), RegMem(dst, mem));
-  ins_pipe( pipe_slow );
-%}
-
-instruct convXI2XD_reg(regXD dst, eRegI src)
+  ins_encode %{
+    __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct convXI2D_reg(regD dst, eRegI src)
 %{
   predicate( UseSSE>=2 && UseXmmI2D );
   match(Set dst (ConvI2D src));
@@ -12147,31 +11355,31 @@
   ins_pipe(pipe_slow); // XXX
 %}
 
-instruct convI2D_mem(regD dst, memory mem) %{
+instruct convI2DPR_mem(regDPR dst, memory mem) %{
   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
   match(Set dst (ConvI2D (LoadI mem)));
   format %{ "FILD   $mem\n\t"
             "FSTP   $dst" %}
   opcode(0xDB);      /* DB /0 */
   ins_encode( OpcP, RMopc_Mem(0x00,mem),
-              Pop_Reg_D(dst));
+              Pop_Reg_DPR(dst));
   ins_pipe( fpu_reg_mem );
 %}
 
 // Convert a byte to a float; no rounding step needed.
-instruct conv24I2F_reg(regF dst, stackSlotI src) %{
+instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
   match(Set dst (ConvI2F src));
   format %{ "FILD   $src\n\t"
             "FSTP   $dst" %}
 
   opcode(0xDB, 0x0);  /* DB /0 */
-  ins_encode(Push_Mem_I(src), Pop_Reg_F(dst));
+  ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
   ins_pipe( fpu_reg_mem );
 %}
 
 // In 24-bit mode, force exponent rounding by storing back out
-instruct convI2F_SSF(stackSlotF dst, stackSlotI src) %{
+instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
   match(Set dst (ConvI2F src));
   ins_cost(200);
@@ -12179,12 +11387,12 @@
             "FSTP_S $dst" %}
   opcode(0xDB, 0x0);  /* DB /0 */
   ins_encode( Push_Mem_I(src),
-              Pop_Mem_F(dst));
+              Pop_Mem_FPR(dst));
   ins_pipe( fpu_mem_mem );
 %}
 
 // In 24-bit mode, force exponent rounding by storing back out
-instruct convI2F_SSF_mem(stackSlotF dst, memory mem) %{
+instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
   match(Set dst (ConvI2F (LoadI mem)));
   ins_cost(200);
@@ -12192,46 +11400,46 @@
             "FSTP_S $dst" %}
   opcode(0xDB);  /* DB /0 */
   ins_encode( OpcP, RMopc_Mem(0x00,mem),
-              Pop_Mem_F(dst));
+              Pop_Mem_FPR(dst));
   ins_pipe( fpu_mem_mem );
 %}
 
 // This instruction does not round to 24-bits
-instruct convI2F_reg(regF dst, stackSlotI src) %{
+instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
   match(Set dst (ConvI2F src));
   format %{ "FILD   $src\n\t"
             "FSTP   $dst" %}
   opcode(0xDB, 0x0);  /* DB /0 */
   ins_encode( Push_Mem_I(src),
-              Pop_Reg_F(dst));
+              Pop_Reg_FPR(dst));
   ins_pipe( fpu_reg_mem );
 %}
 
 // This instruction does not round to 24-bits
-instruct convI2F_mem(regF dst, memory mem) %{
+instruct convI2FPR_mem(regFPR dst, memory mem) %{
   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
   match(Set dst (ConvI2F (LoadI mem)));
   format %{ "FILD   $mem\n\t"
             "FSTP   $dst" %}
   opcode(0xDB);      /* DB /0 */
   ins_encode( OpcP, RMopc_Mem(0x00,mem),
-              Pop_Reg_F(dst));
+              Pop_Reg_FPR(dst));
   ins_pipe( fpu_reg_mem );
 %}
 
 // Convert an int to a float in xmm; no rounding step needed.
-instruct convI2X_reg(regX dst, eRegI src) %{
+instruct convI2F_reg(regF dst, eRegI src) %{
   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
   match(Set dst (ConvI2F src));
   format %{ "CVTSI2SS $dst, $src" %}
-
-  opcode(0xF3, 0x0F, 0x2A);  /* F3 0F 2A /r */
-  ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
-  ins_pipe( pipe_slow );
-%}
-
- instruct convXI2X_reg(regX dst, eRegI src)
+  ins_encode %{
+    __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+ instruct convXI2F_reg(regF dst, eRegI src)
 %{
   predicate( UseSSE>=2 && UseXmmI2F );
   match(Set dst (ConvI2F src));
@@ -12280,7 +11488,7 @@
   ins_pipe( ialu_reg_reg_long );
 %}
 
-instruct convL2D_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
+instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
   predicate (UseSSE<=1);
   match(Set dst (ConvL2D src));
   effect( KILL cr );
@@ -12290,11 +11498,11 @@
             "ADD    ESP,8\n\t"
             "FSTP_D $dst\t# D-round" %}
   opcode(0xDF, 0x5);  /* DF /5 */
-  ins_encode(convert_long_double(src), Pop_Mem_D(dst));
-  ins_pipe( pipe_slow );
-%}
-
-instruct convL2XD_reg( regXD dst, eRegL src, eFlagsReg cr) %{
+  ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
+  ins_pipe( pipe_slow );
+%}
+
+instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
   predicate (UseSSE>=2);
   match(Set dst (ConvL2D src));
   effect( KILL cr );
@@ -12305,11 +11513,11 @@
             "MOVSD  $dst,[ESP]\n\t"
             "ADD    ESP,8" %}
   opcode(0xDF, 0x5);  /* DF /5 */
-  ins_encode(convert_long_double2(src), Push_ResultXD(dst));
-  ins_pipe( pipe_slow );
-%}
-
-instruct convL2X_reg( regX dst, eRegL src, eFlagsReg cr) %{
+  ins_encode(convert_long_double2(src), Push_ResultD(dst));
+  ins_pipe( pipe_slow );
+%}
+
+instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
   predicate (UseSSE>=1);
   match(Set dst (ConvL2F src));
   effect( KILL cr );
@@ -12320,11 +11528,11 @@
             "MOVSS  $dst,[ESP]\n\t"
             "ADD    ESP,8" %}
   opcode(0xDF, 0x5);  /* DF /5 */
-  ins_encode(convert_long_double2(src), Push_ResultX(dst,0x8));
-  ins_pipe( pipe_slow );
-%}
-
-instruct convL2F_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
+  ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
+  ins_pipe( pipe_slow );
+%}
+
+instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
   match(Set dst (ConvL2F src));
   effect( KILL cr );
   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
@@ -12333,7 +11541,7 @@
             "ADD    ESP,8\n\t"
             "FSTP_S $dst\t# F-round" %}
   opcode(0xDF, 0x5);  /* DF /5 */
-  ins_encode(convert_long_double(src), Pop_Mem_F(dst));
+  ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
   ins_pipe( pipe_slow );
 %}
 
@@ -12351,40 +11559,45 @@
   effect( DEF dst, USE src );
   ins_cost(100);
   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
-  opcode(0x8B);
-  ins_encode( OpcP, RegMem(dst,src));
+  ins_encode %{
+    __ movl($dst$$Register, Address(rsp, $src$$disp));
+  %}
   ins_pipe( ialu_reg_mem );
 %}
 
-instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
+instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
   predicate(UseSSE==0);
   match(Set dst (MoveF2I src));
   effect( DEF dst, USE src );
 
   ins_cost(125);
   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
-  ins_encode( Pop_Mem_Reg_F(dst, src) );
+  ins_encode( Pop_Mem_Reg_FPR(dst, src) );
   ins_pipe( fpu_mem_reg );
 %}
 
-instruct MoveF2I_reg_stack_sse(stackSlotI dst, regX src) %{
+instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
   predicate(UseSSE>=1);
   match(Set dst (MoveF2I src));
   effect( DEF dst, USE src );
 
   ins_cost(95);
   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
-  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, dst));
-  ins_pipe( pipe_slow );
-%}
-
-instruct MoveF2I_reg_reg_sse(eRegI dst, regX src) %{
+  ins_encode %{
+    __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct MoveF2I_reg_reg_sse(eRegI dst, regF src) %{
   predicate(UseSSE>=2);
   match(Set dst (MoveF2I src));
   effect( DEF dst, USE src );
   ins_cost(85);
   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
-  ins_encode( MovX2I_reg(dst, src));
+  ins_encode %{
+    __ movdl($dst$$Register, $src$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -12394,13 +11607,14 @@
 
   ins_cost(100);
   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
-  opcode(0x89);
-  ins_encode( OpcPRegSS( dst, src ) );
+  ins_encode %{
+    __ movl(Address(rsp, $dst$$disp), $src$$Register);
+  %}
   ins_pipe( ialu_mem_reg );
 %}
 
 
-instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
+instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
   predicate(UseSSE==0);
   match(Set dst (MoveI2F src));
   effect(DEF dst, USE src);
@@ -12410,29 +11624,33 @@
             "FSTP   $dst\t# MoveI2F_stack_reg" %}
   opcode(0xD9);               /* D9 /0, FLD m32real */
   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
-              Pop_Reg_F(dst) );
+              Pop_Reg_FPR(dst) );
   ins_pipe( fpu_reg_mem );
 %}
 
-instruct MoveI2F_stack_reg_sse(regX dst, stackSlotI src) %{
+instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
   predicate(UseSSE>=1);
   match(Set dst (MoveI2F src));
   effect( DEF dst, USE src );
 
   ins_cost(95);
   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
-  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,src));
-  ins_pipe( pipe_slow );
-%}
-
-instruct MoveI2F_reg_reg_sse(regX dst, eRegI src) %{
+  ins_encode %{
+    __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct MoveI2F_reg_reg_sse(regF dst, eRegI src) %{
   predicate(UseSSE>=2);
   match(Set dst (MoveI2F src));
   effect( DEF dst, USE src );
 
   ins_cost(85);
   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
-  ins_encode( MovI2X_reg(dst, src) );
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -12448,29 +11666,30 @@
   ins_pipe( ialu_mem_long_reg );
 %}
 
-instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
+instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
   predicate(UseSSE<=1);
   match(Set dst (MoveD2L src));
   effect(DEF dst, USE src);
 
   ins_cost(125);
   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
-  ins_encode( Pop_Mem_Reg_D(dst, src) );
+  ins_encode( Pop_Mem_Reg_DPR(dst, src) );
   ins_pipe( fpu_mem_reg );
 %}
 
-instruct MoveD2L_reg_stack_sse(stackSlotL dst, regXD src) %{
+instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
   predicate(UseSSE>=2);
   match(Set dst (MoveD2L src));
   effect(DEF dst, USE src);
   ins_cost(95);
-
   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
-  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src,dst));
-  ins_pipe( pipe_slow );
-%}
-
-instruct MoveD2L_reg_reg_sse(eRegL dst, regXD src, regXD tmp) %{
+  ins_encode %{
+    __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
   predicate(UseSSE>=2);
   match(Set dst (MoveD2L src));
   effect(DEF dst, USE src, TEMP tmp);
@@ -12478,7 +11697,11 @@
   format %{ "MOVD   $dst.lo,$src\n\t"
             "PSHUFLW $tmp,$src,0x4E\n\t"
             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
-  ins_encode( MovXD2L_reg(dst, src, tmp) );
+  ins_encode %{
+    __ movdl($dst$$Register, $src$$XMMRegister);
+    __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
+    __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -12495,7 +11718,7 @@
 %}
 
 
-instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
+instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
   predicate(UseSSE<=1);
   match(Set dst (MoveL2D src));
   effect(DEF dst, USE src);
@@ -12505,34 +11728,38 @@
             "FSTP   $dst\t# MoveL2D_stack_reg" %}
   opcode(0xDD);               /* DD /0, FLD m64real */
   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
-              Pop_Reg_D(dst) );
+              Pop_Reg_DPR(dst) );
   ins_pipe( fpu_reg_mem );
 %}
 
 
-instruct MoveL2D_stack_reg_sse(regXD dst, stackSlotL src) %{
+instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
   match(Set dst (MoveL2D src));
   effect(DEF dst, USE src);
 
   ins_cost(95);
   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
-  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,src));
-  ins_pipe( pipe_slow );
-%}
-
-instruct MoveL2D_stack_reg_sse_partial(regXD dst, stackSlotL src) %{
+  ins_encode %{
+    __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
   match(Set dst (MoveL2D src));
   effect(DEF dst, USE src);
 
   ins_cost(95);
   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
-  ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,src));
-  ins_pipe( pipe_slow );
-%}
-
-instruct MoveL2D_reg_reg_sse(regXD dst, eRegL src, regXD tmp) %{
+  ins_encode %{
+    __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
   predicate(UseSSE>=2);
   match(Set dst (MoveL2D src));
   effect(TEMP dst, USE src, TEMP tmp);
@@ -12540,149 +11767,192 @@
   format %{ "MOVD   $dst,$src.lo\n\t"
             "MOVD   $tmp,$src.hi\n\t"
             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
-  ins_encode( MovL2XD_reg(dst, src, tmp) );
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
+    __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Replicate scalar to packed byte (1 byte) values in xmm
-instruct Repl8B_reg(regXD dst, regXD src) %{
+instruct Repl8B_reg(regD dst, regD src) %{
   predicate(UseSSE>=2);
   match(Set dst (Replicate8B src));
   format %{ "MOVDQA  $dst,$src\n\t"
             "PUNPCKLBW $dst,$dst\n\t"
             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
-  ins_encode( pshufd_8x8(dst, src));
+  ins_encode %{
+    if ($dst$$reg != $src$$reg) {
+      __ movdqa($dst$$XMMRegister, $src$$XMMRegister);
+    }
+    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Replicate scalar to packed byte (1 byte) values in xmm
-instruct Repl8B_eRegI(regXD dst, eRegI src) %{
+instruct Repl8B_eRegI(regD dst, eRegI src) %{
   predicate(UseSSE>=2);
   match(Set dst (Replicate8B src));
   format %{ "MOVD    $dst,$src\n\t"
             "PUNPCKLBW $dst,$dst\n\t"
             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
-  ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Replicate scalar zero to packed byte (1 byte) values in xmm
-instruct Repl8B_immI0(regXD dst, immI0 zero) %{
+instruct Repl8B_immI0(regD dst, immI0 zero) %{
   predicate(UseSSE>=2);
   match(Set dst (Replicate8B zero));
   format %{ "PXOR  $dst,$dst\t! replicate8B" %}
-  ins_encode( pxor(dst, dst));
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
 // Replicate scalar to packed shore (2 byte) values in xmm
-instruct Repl4S_reg(regXD dst, regXD src) %{
+instruct Repl4S_reg(regD dst, regD src) %{
   predicate(UseSSE>=2);
   match(Set dst (Replicate4S src));
   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
-  ins_encode( pshufd_4x16(dst, src));
+  ins_encode %{
+    __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
 // Replicate scalar to packed shore (2 byte) values in xmm
-instruct Repl4S_eRegI(regXD dst, eRegI src) %{
+instruct Repl4S_eRegI(regD dst, eRegI src) %{
   predicate(UseSSE>=2);
   match(Set dst (Replicate4S src));
   format %{ "MOVD    $dst,$src\n\t"
             "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
-  ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
 // Replicate scalar zero to packed short (2 byte) values in xmm
-instruct Repl4S_immI0(regXD dst, immI0 zero) %{
+instruct Repl4S_immI0(regD dst, immI0 zero) %{
   predicate(UseSSE>=2);
   match(Set dst (Replicate4S zero));
   format %{ "PXOR  $dst,$dst\t! replicate4S" %}
-  ins_encode( pxor(dst, dst));
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
 // Replicate scalar to packed char (2 byte) values in xmm
-instruct Repl4C_reg(regXD dst, regXD src) %{
+instruct Repl4C_reg(regD dst, regD src) %{
   predicate(UseSSE>=2);
   match(Set dst (Replicate4C src));
   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
-  ins_encode( pshufd_4x16(dst, src));
+  ins_encode %{
+    __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
 // Replicate scalar to packed char (2 byte) values in xmm
-instruct Repl4C_eRegI(regXD dst, eRegI src) %{
+instruct Repl4C_eRegI(regD dst, eRegI src) %{
   predicate(UseSSE>=2);
   match(Set dst (Replicate4C src));
   format %{ "MOVD    $dst,$src\n\t"
             "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
-  ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
 // Replicate scalar zero to packed char (2 byte) values in xmm
-instruct Repl4C_immI0(regXD dst, immI0 zero) %{
+instruct Repl4C_immI0(regD dst, immI0 zero) %{
   predicate(UseSSE>=2);
   match(Set dst (Replicate4C zero));
   format %{ "PXOR  $dst,$dst\t! replicate4C" %}
-  ins_encode( pxor(dst, dst));
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
 // Replicate scalar to packed integer (4 byte) values in xmm
-instruct Repl2I_reg(regXD dst, regXD src) %{
+instruct Repl2I_reg(regD dst, regD src) %{
   predicate(UseSSE>=2);
   match(Set dst (Replicate2I src));
   format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
-  ins_encode( pshufd(dst, src, 0x00));
+  ins_encode %{
+    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
 // Replicate scalar to packed integer (4 byte) values in xmm
-instruct Repl2I_eRegI(regXD dst, eRegI src) %{
+instruct Repl2I_eRegI(regD dst, eRegI src) %{
   predicate(UseSSE>=2);
   match(Set dst (Replicate2I src));
   format %{ "MOVD   $dst,$src\n\t"
             "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
-  ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
 // Replicate scalar zero to packed integer (2 byte) values in xmm
-instruct Repl2I_immI0(regXD dst, immI0 zero) %{
+instruct Repl2I_immI0(regD dst, immI0 zero) %{
   predicate(UseSSE>=2);
   match(Set dst (Replicate2I zero));
   format %{ "PXOR  $dst,$dst\t! replicate2I" %}
-  ins_encode( pxor(dst, dst));
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
 // Replicate scalar to packed single precision floating point values in xmm
-instruct Repl2F_reg(regXD dst, regXD src) %{
+instruct Repl2F_reg(regD dst, regD src) %{
   predicate(UseSSE>=2);
   match(Set dst (Replicate2F src));
   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
-  ins_encode( pshufd(dst, src, 0xe0));
+  ins_encode %{
+    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
 // Replicate scalar to packed single precision floating point values in xmm
-instruct Repl2F_regX(regXD dst, regX src) %{
+instruct Repl2F_regF(regD dst, regF src) %{
   predicate(UseSSE>=2);
   match(Set dst (Replicate2F src));
   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
-  ins_encode( pshufd(dst, src, 0xe0));
+  ins_encode %{
+    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
 // Replicate scalar to packed single precision floating point values in xmm
-instruct Repl2F_immXF0(regXD dst, immXF0 zero) %{
+instruct Repl2F_immF0(regD dst, immF0 zero) %{
   predicate(UseSSE>=2);
   match(Set dst (Replicate2F zero));
   format %{ "PXOR  $dst,$dst\t! replicate2F" %}
-  ins_encode( pxor(dst, dst));
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
@@ -12702,7 +11972,7 @@
 %}
 
 instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
-                        eAXRegI result, regXD tmp1, eFlagsReg cr) %{
+                        eAXRegI result, regD tmp1, eFlagsReg cr) %{
   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
 
@@ -12717,7 +11987,7 @@
 
 // fast string equals
 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
-                       regXD tmp1, regXD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
+                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
   match(Set result (StrEquals (Binary str1 str2) cnt));
   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
 
@@ -12732,7 +12002,7 @@
 
 // fast search of substring with known size.
 instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
-                            eBXRegI result, regXD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
+                            eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
   predicate(UseSSE42Intrinsics);
   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
@@ -12759,7 +12029,7 @@
 %}
 
 instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
-                        eBXRegI result, regXD vec, eCXRegI tmp, eFlagsReg cr) %{
+                        eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
   predicate(UseSSE42Intrinsics);
   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
@@ -12776,7 +12046,7 @@
 
 // fast array equals
 instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
-                      regXD tmp1, regXD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
+                      regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
 %{
   match(Set result (AryEq ary1 ary2));
   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
@@ -13602,27 +12872,36 @@
 %}
 
 // Compare 2 longs and CMOVE doubles
-instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
+instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
   ins_cost(200);
   expand %{
-    fcmovD_regS(cmp,flags,dst,src);
+    fcmovDPR_regS(cmp,flags,dst,src);
   %}
 %}
 
 // Compare 2 longs and CMOVE doubles
-instruct cmovXDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regXD dst, regXD src) %{
+instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
   ins_cost(200);
   expand %{
-    fcmovXD_regS(cmp,flags,dst,src);
+    fcmovD_regS(cmp,flags,dst,src);
+  %}
+%}
+
+instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
+  predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
+  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
+  ins_cost(200);
+  expand %{
+    fcmovFPR_regS(cmp,flags,dst,src);
   %}
 %}
 
 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
-  predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
+  predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
   ins_cost(200);
   expand %{
@@ -13630,15 +12909,6 @@
   %}
 %}
 
-instruct cmovXX_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regX dst, regX src) %{
-  predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
-  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
-  ins_cost(200);
-  expand %{
-    fcmovX_regS(cmp,flags,dst,src);
-  %}
-%}
-
 //======
 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, eRegI tmp ) %{
@@ -13730,27 +13000,36 @@
 %}
 
 // Compare 2 longs and CMOVE doubles
-instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
+instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
   ins_cost(200);
   expand %{
-    fcmovD_regS(cmp,flags,dst,src);
+    fcmovDPR_regS(cmp,flags,dst,src);
   %}
 %}
 
 // Compare 2 longs and CMOVE doubles
-instruct cmovXDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regXD dst, regXD src) %{
+instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
   ins_cost(200);
   expand %{
-    fcmovXD_regS(cmp,flags,dst,src);
+    fcmovD_regS(cmp,flags,dst,src);
+  %}
+%}
+
+instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
+  predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
+  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
+  ins_cost(200);
+  expand %{
+    fcmovFPR_regS(cmp,flags,dst,src);
   %}
 %}
 
 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
-  predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
+  predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
   ins_cost(200);
   expand %{
@@ -13758,15 +13037,6 @@
   %}
 %}
 
-instruct cmovXX_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regX dst, regX src) %{
-  predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
-  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
-  ins_cost(200);
-  expand %{
-    fcmovX_regS(cmp,flags,dst,src);
-  %}
-%}
-
 //======
 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
 // Same as cmpL_reg_flags_LEGT except must negate src
@@ -13863,27 +13133,37 @@
 %}
 
 // Compare 2 longs and CMOVE doubles
-instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
+instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
   ins_cost(200);
   expand %{
-    fcmovD_regS(cmp,flags,dst,src);
+    fcmovDPR_regS(cmp,flags,dst,src);
   %}
 %}
 
 // Compare 2 longs and CMOVE doubles
-instruct cmovXDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regXD dst, regXD src) %{
+instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
   ins_cost(200);
   expand %{
-    fcmovXD_regS(cmp,flags,dst,src);
-  %}
-%}
+    fcmovD_regS(cmp,flags,dst,src);
+  %}
+%}
+
+instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
+  predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
+  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
+  ins_cost(200);
+  expand %{
+    fcmovFPR_regS(cmp,flags,dst,src);
+  %}
+%}
+
 
 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
-  predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
+  predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
   ins_cost(200);
   expand %{
@@ -13892,16 +13172,6 @@
 %}
 
 
-instruct cmovXX_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regX dst, regX src) %{
-  predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
-  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
-  ins_cost(200);
-  expand %{
-    fcmovX_regS(cmp,flags,dst,src);
-  %}
-%}
-
-
 // ============================================================================
 // Procedure Call/Return Instructions
 // Call Java Static Instruction
@@ -14076,20 +13346,20 @@
 // inlined locking and unlocking
 
 
-instruct cmpFastLock( eFlagsReg cr, eRegP object, eRegP box, eAXRegI tmp, eRegP scr) %{
+instruct cmpFastLock( eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
   match( Set cr (FastLock object box) );
-  effect( TEMP tmp, TEMP scr );
+  effect( TEMP tmp, TEMP scr, USE_KILL box );
   ins_cost(300);
-  format %{ "FASTLOCK $object, $box KILLS $tmp,$scr" %}
+  format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
   ins_encode( Fast_Lock(object,box,tmp,scr) );
   ins_pipe( pipe_slow );
 %}
 
 instruct cmpFastUnlock( eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
   match( Set cr (FastUnlock object box) );
-  effect( TEMP tmp );
+  effect( TEMP tmp, USE_KILL box );
   ins_cost(300);
-  format %{ "FASTUNLOCK $object, $box, $tmp" %}
+  format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
   ins_encode( Fast_Unlock(object,box,tmp) );
   ins_pipe( pipe_slow );
 %}
@@ -14117,6 +13387,25 @@
   ins_pipe( ialu_reg_mem );
 %}
 
+
+// ============================================================================
+// This name is KNOWN by the ADLC and cannot be changed.
+// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
+// for this guy.
+instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
+  match(Set dst (ThreadLocal));
+  effect(DEF dst, KILL cr);
+
+  format %{ "MOV    $dst, Thread::current()" %}
+  ins_encode %{
+    Register dstReg = as_Register($dst$$reg);
+    __ get_thread(dstReg);
+  %}
+  ins_pipe( ialu_reg_fat );
+%}
+
+
+
 //----------PEEPHOLE RULES-----------------------------------------------------
 // These must follow all instruction definitions as they use the names
 // defined in the instructions definitions.
--- a/src/cpu/x86/vm/x86_64.ad	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/x86/vm/x86_64.ad	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
 // This code is free software; you can redistribute it and/or modify it
@@ -552,7 +552,7 @@
 #define __ _masm.
 
 static int preserve_SP_size() {
-  return LP64_ONLY(1 +) 2;  // [rex,] op, rm(reg/reg)
+  return 3;  // rex.w, op, rm(reg/reg)
 }
 
 // !!!!! Special hack to get all types of calls to specify the byte offset
@@ -610,13 +610,6 @@
   return round_to(current_offset, alignment_required()) - current_offset;
 }
 
-#ifndef PRODUCT
-void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const
-{
-  st->print("INT3");
-}
-#endif
-
 // EMIT_RM()
 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
   unsigned char c = (unsigned char) ((f1 << 6) | (f2 << 3) | f3);
@@ -797,48 +790,35 @@
   }
 }
 
-void encode_copy(CodeBuffer &cbuf, int dstenc, int srcenc)
-{
-  if (dstenc != srcenc) {
-    if (dstenc < 8) {
-      if (srcenc >= 8) {
-        emit_opcode(cbuf, Assembler::REX_B);
-        srcenc -= 8;
-      }
-    } else {
-      if (srcenc < 8) {
-        emit_opcode(cbuf, Assembler::REX_R);
-      } else {
-        emit_opcode(cbuf, Assembler::REX_RB);
-        srcenc -= 8;
-      }
-      dstenc -= 8;
-    }
-
-    emit_opcode(cbuf, 0x8B);
-    emit_rm(cbuf, 0x3, dstenc, srcenc);
-  }
-}
-
-void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
-  if( dst_encoding == src_encoding ) {
-    // reg-reg copy, use an empty encoding
-  } else {
-    MacroAssembler _masm(&cbuf);
-
-    __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
-  }
-}
-
 // This could be in MacroAssembler but it's fairly C2 specific
 void emit_cmpfp_fixup(MacroAssembler& _masm) {
   Label exit;
   __ jccb(Assembler::noParity, exit);
   __ pushf();
+  //
+  // comiss/ucomiss instructions set ZF,PF,CF flags and
+  // zero OF,AF,SF for NaN values.
+  // Fixup flags by zeroing ZF,PF so that compare of NaN
+  // values returns 'less than' result (CF is set).
+  // Leave the rest of flags unchanged.
+  //
+  //    7 6 5 4 3 2 1 0
+  //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
+  //    0 0 1 0 1 0 1 1   (0x2B)
+  //
   __ andq(Address(rsp, 0), 0xffffff2b);
   __ popf();
   __ bind(exit);
-  __ nop(); // (target for branch to avoid branch to branch)
+}
+
+void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
+  Label done;
+  __ movl(dst, -1);
+  __ jcc(Assembler::parity, done);
+  __ jcc(Assembler::below, done);
+  __ setb(Assembler::notEqual, dst);
+  __ movzbl(dst, dst);
+  __ bind(done);
 }
 
 
@@ -866,121 +846,53 @@
 
 //=============================================================================
 #ifndef PRODUCT
-void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const
-{
+void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
   Compile* C = ra_->C;
 
   int framesize = C->frame_slots() << LogBytesPerInt;
   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
-  // Remove wordSize for return adr already pushed
-  // and another for the RBP we are going to save
-  framesize -= 2*wordSize;
-  bool need_nop = true;
-
-  // Calls to C2R adapters often do not accept exceptional returns.
-  // We require that their callers must bang for them.  But be
-  // careful, because some VM calls (such as call site linkage) can
-  // use several kilobytes of stack.  But the stack safety zone should
-  // account for that.  See bugs 4446381, 4468289, 4497237.
+  // Remove wordSize for return addr which is already pushed.
+  framesize -= wordSize;
+
   if (C->need_stack_bang(framesize)) {
-    st->print_cr("# stack bang"); st->print("\t");
-    need_nop = false;
+    framesize -= wordSize;
+    st->print("# stack bang");
+    st->print("\n\t");
+    st->print("pushq   rbp\t# Save rbp");
+    if (framesize) {
+      st->print("\n\t");
+      st->print("subq    rsp, #%d\t# Create frame",framesize);
+    }
+  } else {
+    st->print("subq    rsp, #%d\t# Create frame",framesize);
+    st->print("\n\t");
+    framesize -= wordSize;
+    st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
   }
-  st->print_cr("pushq   rbp"); st->print("\t");
 
   if (VerifyStackAtCalls) {
-    // Majik cookie to verify stack depth
-    st->print_cr("pushq   0xffffffffbadb100d"
-                  "\t# Majik cookie for stack depth check");
-    st->print("\t");
-    framesize -= wordSize; // Remove 2 for cookie
-    need_nop = false;
+    st->print("\n\t");
+    framesize -= wordSize;
+    st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
+#ifdef ASSERT
+    st->print("\n\t");
+    st->print("# stack alignment check");
+#endif
   }
-
-  if (framesize) {
-    st->print("subq    rsp, #%d\t# Create frame", framesize);
-    if (framesize < 0x80 && need_nop) {
-      st->print("\n\tnop\t# nop for patch_verified_entry");
-    }
-  }
+  st->cr();
 }
 #endif
 
-void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
-{
+void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
   Compile* C = ra_->C;
-
-  // WARNING: Initial instruction MUST be 5 bytes or longer so that
-  // NativeJump::patch_verified_entry will be able to patch out the entry
-  // code safely. The fldcw is ok at 6 bytes, the push to verify stack
-  // depth is ok at 5 bytes, the frame allocation can be either 3 or
-  // 6 bytes. So if we don't do the fldcw or the push then we must
-  // use the 6 byte frame allocation even if we have no frame. :-(
-  // If method sets FPU control word do it now
+  MacroAssembler _masm(&cbuf);
 
   int framesize = C->frame_slots() << LogBytesPerInt;
-  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
-  // Remove wordSize for return adr already pushed
-  // and another for the RBP we are going to save
-  framesize -= 2*wordSize;
-  bool need_nop = true;
-
-  // Calls to C2R adapters often do not accept exceptional returns.
-  // We require that their callers must bang for them.  But be
-  // careful, because some VM calls (such as call site linkage) can
-  // use several kilobytes of stack.  But the stack safety zone should
-  // account for that.  See bugs 4446381, 4468289, 4497237.
-  if (C->need_stack_bang(framesize)) {
-    MacroAssembler masm(&cbuf);
-    masm.generate_stack_overflow_check(framesize);
-    need_nop = false;
-  }
-
-  // We always push rbp so that on return to interpreter rbp will be
-  // restored correctly and we can correct the stack.
-  emit_opcode(cbuf, 0x50 | RBP_enc);
-
-  if (VerifyStackAtCalls) {
-    // Majik cookie to verify stack depth
-    emit_opcode(cbuf, 0x68); // pushq (sign-extended) 0xbadb100d
-    emit_d32(cbuf, 0xbadb100d);
-    framesize -= wordSize; // Remove 2 for cookie
-    need_nop = false;
-  }
-
-  if (framesize) {
-    emit_opcode(cbuf, Assembler::REX_W);
-    if (framesize < 0x80) {
-      emit_opcode(cbuf, 0x83);   // sub  SP,#framesize
-      emit_rm(cbuf, 0x3, 0x05, RSP_enc);
-      emit_d8(cbuf, framesize);
-      if (need_nop) {
-        emit_opcode(cbuf, 0x90); // nop
-      }
-    } else {
-      emit_opcode(cbuf, 0x81);   // sub  SP,#framesize
-      emit_rm(cbuf, 0x3, 0x05, RSP_enc);
-      emit_d32(cbuf, framesize);
-    }
-  }
+
+  __ verified_entry(framesize, C->need_stack_bang(framesize), false);
 
   C->set_frame_complete(cbuf.insts_size());
 
-#ifdef ASSERT
-  if (VerifyStackAtCalls) {
-    Label L;
-    MacroAssembler masm(&cbuf);
-    masm.push(rax);
-    masm.mov(rax, rsp);
-    masm.andptr(rax, StackAlignmentInBytes-1);
-    masm.cmpptr(rax, StackAlignmentInBytes-wordSize);
-    masm.pop(rax);
-    masm.jcc(Assembler::equal, L);
-    masm.stop("Stack is not properly aligned!");
-    masm.bind(L);
-  }
-#endif
-
   if (C->has_mach_constant_base_node()) {
     // NOTE: We set the table base offset here because users might be
     // emitted before MachConstantBaseNode.
@@ -1274,16 +1186,8 @@
         // 64-bit
         int offset = ra_->reg2offset(src_first);
         if (cbuf) {
-          emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
-          if (Matcher::_regEncode[dst_first] >= 8) {
-            emit_opcode(*cbuf, Assembler::REX_R);
-          }
-          emit_opcode(*cbuf, 0x0F);
-          emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
-          encode_RegMem(*cbuf,
-                        Matcher::_regEncode[dst_first],
-                        RSP_enc, 0x4, 0, offset,
-                        false);
+          MacroAssembler _masm(cbuf);
+          __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 #ifndef PRODUCT
         } else if (!do_size) {
           st->print("%s  %s, [rsp + #%d]\t# spill",
@@ -1294,25 +1198,17 @@
         }
         return
           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
-          ((Matcher::_regEncode[dst_first] < 8)
-           ? 5
-           : 6); // REX
+          ((Matcher::_regEncode[dst_first] >= 8)
+           ? 6
+           : (5 + ((UseAVX>0)?1:0))); // REX
       } else {
         // 32-bit
         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
         int offset = ra_->reg2offset(src_first);
         if (cbuf) {
-          emit_opcode(*cbuf, 0xF3);
-          if (Matcher::_regEncode[dst_first] >= 8) {
-            emit_opcode(*cbuf, Assembler::REX_R);
-          }
-          emit_opcode(*cbuf, 0x0F);
-          emit_opcode(*cbuf, 0x10);
-          encode_RegMem(*cbuf,
-                        Matcher::_regEncode[dst_first],
-                        RSP_enc, 0x4, 0, offset,
-                        false);
+          MacroAssembler _masm(cbuf);
+          __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 #ifndef PRODUCT
         } else if (!do_size) {
           st->print("movss   %s, [rsp + #%d]\t# spill",
@@ -1322,9 +1218,9 @@
         }
         return
           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
-          ((Matcher::_regEncode[dst_first] < 8)
-           ? 5
-           : 6); // REX
+          ((Matcher::_regEncode[dst_first] >= 8)
+           ? 6
+           : (5 + ((UseAVX>0)?1:0))); // REX
       }
     }
   } else if (src_first_rc == rc_int) {
@@ -1450,25 +1346,8 @@
           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
         // 64-bit
         if (cbuf) {
-          emit_opcode(*cbuf, 0x66);
-          if (Matcher::_regEncode[dst_first] < 8) {
-            if (Matcher::_regEncode[src_first] < 8) {
-              emit_opcode(*cbuf, Assembler::REX_W);
-            } else {
-              emit_opcode(*cbuf, Assembler::REX_WB);
-            }
-          } else {
-            if (Matcher::_regEncode[src_first] < 8) {
-              emit_opcode(*cbuf, Assembler::REX_WR);
-            } else {
-              emit_opcode(*cbuf, Assembler::REX_WRB);
-            }
-          }
-          emit_opcode(*cbuf, 0x0F);
-          emit_opcode(*cbuf, 0x6E);
-          emit_rm(*cbuf, 0x3,
-                  Matcher::_regEncode[dst_first] & 7,
-                  Matcher::_regEncode[src_first] & 7);
+          MacroAssembler _masm(cbuf);
+          __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
         } else if (!do_size) {
           st->print("movdq   %s, %s\t# spill",
@@ -1482,23 +1361,8 @@
         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
         if (cbuf) {
-          emit_opcode(*cbuf, 0x66);
-          if (Matcher::_regEncode[dst_first] < 8) {
-            if (Matcher::_regEncode[src_first] >= 8) {
-              emit_opcode(*cbuf, Assembler::REX_B);
-            }
-          } else {
-            if (Matcher::_regEncode[src_first] < 8) {
-              emit_opcode(*cbuf, Assembler::REX_R);
-            } else {
-              emit_opcode(*cbuf, Assembler::REX_RB);
-            }
-          }
-          emit_opcode(*cbuf, 0x0F);
-          emit_opcode(*cbuf, 0x6E);
-          emit_rm(*cbuf, 0x3,
-                  Matcher::_regEncode[dst_first] & 7,
-                  Matcher::_regEncode[src_first] & 7);
+          MacroAssembler _masm(cbuf);
+          __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
         } else if (!do_size) {
           st->print("movdl   %s, %s\t# spill",
@@ -1507,9 +1371,9 @@
 #endif
         }
         return
-          (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
-          ? 4
-          : 5; // REX
+          (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
+          ? 5
+          : (4 + ((UseAVX>0)?1:0)); // REX
       }
     }
   } else if (src_first_rc == rc_float) {
@@ -1521,16 +1385,8 @@
         // 64-bit
         int offset = ra_->reg2offset(dst_first);
         if (cbuf) {
-          emit_opcode(*cbuf, 0xF2);
-          if (Matcher::_regEncode[src_first] >= 8) {
-              emit_opcode(*cbuf, Assembler::REX_R);
-          }
-          emit_opcode(*cbuf, 0x0F);
-          emit_opcode(*cbuf, 0x11);
-          encode_RegMem(*cbuf,
-                        Matcher::_regEncode[src_first],
-                        RSP_enc, 0x4, 0, offset,
-                        false);
+          MacroAssembler _masm(cbuf);
+          __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
         } else if (!do_size) {
           st->print("movsd   [rsp + #%d], %s\t# spill",
@@ -1540,25 +1396,17 @@
         }
         return
           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
-          ((Matcher::_regEncode[src_first] < 8)
-           ? 5
-           : 6); // REX
+          ((Matcher::_regEncode[src_first] >= 8)
+           ? 6
+           : (5 + ((UseAVX>0)?1:0))); // REX
       } else {
         // 32-bit
         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
         int offset = ra_->reg2offset(dst_first);
         if (cbuf) {
-          emit_opcode(*cbuf, 0xF3);
-          if (Matcher::_regEncode[src_first] >= 8) {
-              emit_opcode(*cbuf, Assembler::REX_R);
-          }
-          emit_opcode(*cbuf, 0x0F);
-          emit_opcode(*cbuf, 0x11);
-          encode_RegMem(*cbuf,
-                        Matcher::_regEncode[src_first],
-                        RSP_enc, 0x4, 0, offset,
-                        false);
+          MacroAssembler _masm(cbuf);
+          __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
         } else if (!do_size) {
           st->print("movss   [rsp + #%d], %s\t# spill",
@@ -1568,9 +1416,9 @@
         }
         return
           ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
-          ((Matcher::_regEncode[src_first] < 8)
-           ? 5
-           : 6); // REX
+          ((Matcher::_regEncode[src_first] >=8)
+           ? 6
+           : (5 + ((UseAVX>0)?1:0))); // REX
       }
     } else if (dst_first_rc == rc_int) {
       // xmm -> gpr
@@ -1578,25 +1426,8 @@
           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
         // 64-bit
         if (cbuf) {
-          emit_opcode(*cbuf, 0x66);
-          if (Matcher::_regEncode[dst_first] < 8) {
-            if (Matcher::_regEncode[src_first] < 8) {
-              emit_opcode(*cbuf, Assembler::REX_W);
-            } else {
-              emit_opcode(*cbuf, Assembler::REX_WR); // attention!
-            }
-          } else {
-            if (Matcher::_regEncode[src_first] < 8) {
-              emit_opcode(*cbuf, Assembler::REX_WB); // attention!
-            } else {
-              emit_opcode(*cbuf, Assembler::REX_WRB);
-            }
-          }
-          emit_opcode(*cbuf, 0x0F);
-          emit_opcode(*cbuf, 0x7E);
-          emit_rm(*cbuf, 0x3,
-                  Matcher::_regEncode[src_first] & 7,
-                  Matcher::_regEncode[dst_first] & 7);
+          MacroAssembler _masm(cbuf);
+          __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
         } else if (!do_size) {
           st->print("movdq   %s, %s\t# spill",
@@ -1610,23 +1441,8 @@
         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
         if (cbuf) {
-          emit_opcode(*cbuf, 0x66);
-          if (Matcher::_regEncode[dst_first] < 8) {
-            if (Matcher::_regEncode[src_first] >= 8) {
-              emit_opcode(*cbuf, Assembler::REX_R); // attention!
-            }
-          } else {
-            if (Matcher::_regEncode[src_first] < 8) {
-              emit_opcode(*cbuf, Assembler::REX_B); // attention!
-            } else {
-              emit_opcode(*cbuf, Assembler::REX_RB);
-            }
-          }
-          emit_opcode(*cbuf, 0x0F);
-          emit_opcode(*cbuf, 0x7E);
-          emit_rm(*cbuf, 0x3,
-                  Matcher::_regEncode[src_first] & 7,
-                  Matcher::_regEncode[dst_first] & 7);
+          MacroAssembler _masm(cbuf);
+          __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
         } else if (!do_size) {
           st->print("movdl   %s, %s\t# spill",
@@ -1635,9 +1451,9 @@
 #endif
         }
         return
-          (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
-          ? 4
-          : 5; // REX
+          (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
+          ? 5
+          : (4 + ((UseAVX>0)?1:0)); // REX
       }
     } else if (dst_first_rc == rc_float) {
       // xmm -> xmm
@@ -1645,23 +1461,8 @@
           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
         // 64-bit
         if (cbuf) {
-          emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
-          if (Matcher::_regEncode[dst_first] < 8) {
-            if (Matcher::_regEncode[src_first] >= 8) {
-              emit_opcode(*cbuf, Assembler::REX_B);
-            }
-          } else {
-            if (Matcher::_regEncode[src_first] < 8) {
-              emit_opcode(*cbuf, Assembler::REX_R);
-            } else {
-              emit_opcode(*cbuf, Assembler::REX_RB);
-            }
-          }
-          emit_opcode(*cbuf, 0x0F);
-          emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
-          emit_rm(*cbuf, 0x3,
-                  Matcher::_regEncode[dst_first] & 7,
-                  Matcher::_regEncode[src_first] & 7);
+          MacroAssembler _masm(cbuf);
+          __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
         } else if (!do_size) {
           st->print("%s  %s, %s\t# spill",
@@ -1671,32 +1472,16 @@
 #endif
         }
         return
-          (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
-          ? 4
-          : 5; // REX
+          (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
+          ? 5
+          : (4 + ((UseAVX>0)?1:0)); // REX
       } else {
         // 32-bit
         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
         if (cbuf) {
-          if (!UseXmmRegToRegMoveAll)
-            emit_opcode(*cbuf, 0xF3);
-          if (Matcher::_regEncode[dst_first] < 8) {
-            if (Matcher::_regEncode[src_first] >= 8) {
-              emit_opcode(*cbuf, Assembler::REX_B);
-            }
-          } else {
-            if (Matcher::_regEncode[src_first] < 8) {
-              emit_opcode(*cbuf, Assembler::REX_R);
-            } else {
-              emit_opcode(*cbuf, Assembler::REX_RB);
-            }
-          }
-          emit_opcode(*cbuf, 0x0F);
-          emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
-          emit_rm(*cbuf, 0x3,
-                  Matcher::_regEncode[dst_first] & 7,
-                  Matcher::_regEncode[src_first] & 7);
+          MacroAssembler _masm(cbuf);
+          __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
         } else if (!do_size) {
           st->print("%s  %s, %s\t# spill",
@@ -1705,10 +1490,10 @@
                      Matcher::regName[src_first]);
 #endif
         }
-        return
-          (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
-          ? (UseXmmRegToRegMoveAll ? 3 : 4)
-          : (UseXmmRegToRegMoveAll ? 4 : 5); // REX
+        return ((UseAVX>0) ? 5:
+          ((Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
+           ? (UseXmmRegToRegMoveAll ? 4 : 5)
+           : (UseXmmRegToRegMoveAll ? 3 : 4))); // REX
       }
     }
   }
@@ -1738,26 +1523,6 @@
 
 //=============================================================================
 #ifndef PRODUCT
-void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const
-{
-  st->print("nop \t# %d bytes pad for loops and calls", _count);
-}
-#endif
-
-void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const
-{
-  MacroAssembler _masm(&cbuf);
-  __ nop(_count);
-}
-
-uint MachNopNode::size(PhaseRegAlloc*) const
-{
-  return _count;
-}
-
-
-//=============================================================================
-#ifndef PRODUCT
 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 {
   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
@@ -2205,47 +1970,6 @@
     emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
   %}
 
-  enc_class cmpfp_fixup() %{
-      MacroAssembler _masm(&cbuf);
-      emit_cmpfp_fixup(_masm);
-  %}
-
-  enc_class cmpfp3(rRegI dst)
-  %{
-    int dstenc = $dst$$reg;
-
-    // movl $dst, -1
-    if (dstenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_B);
-    }
-    emit_opcode(cbuf, 0xB8 | (dstenc & 7));
-    emit_d32(cbuf, -1);
-
-    // jp,s done
-    emit_opcode(cbuf, 0x7A);
-    emit_d8(cbuf, dstenc < 4 ? 0x08 : 0x0A);
-
-    // jb,s done
-    emit_opcode(cbuf, 0x72);
-    emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
-
-    // setne $dst
-    if (dstenc >= 4) {
-      emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
-    }
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, 0x95);
-    emit_opcode(cbuf, 0xC0 | (dstenc & 7));
-
-    // movzbl $dst, $dst
-    if (dstenc >= 4) {
-      emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
-    }
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, 0xB6);
-    emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
-  %}
-
   enc_class cdql_enc(no_rax_rdx_RegI div)
   %{
     // Full implementation of Java idiv and irem; checks for
@@ -2472,55 +2196,6 @@
     emit_cc(cbuf, $secondary, $cop$$cmpcode);
   %}
 
-  enc_class enc_cmovf_branch(cmpOp cop, regF dst, regF src)
-  %{
-    // Invert sense of branch from sense of cmov
-    emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
-    emit_d8(cbuf, ($dst$$reg < 8 && $src$$reg < 8)
-                  ? (UseXmmRegToRegMoveAll ? 3 : 4)
-                  : (UseXmmRegToRegMoveAll ? 4 : 5) ); // REX
-    // UseXmmRegToRegMoveAll ? movaps(dst, src) : movss(dst, src)
-    if (!UseXmmRegToRegMoveAll) emit_opcode(cbuf, 0xF3);
-    if ($dst$$reg < 8) {
-      if ($src$$reg >= 8) {
-        emit_opcode(cbuf, Assembler::REX_B);
-      }
-    } else {
-      if ($src$$reg < 8) {
-        emit_opcode(cbuf, Assembler::REX_R);
-      } else {
-        emit_opcode(cbuf, Assembler::REX_RB);
-      }
-    }
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
-    emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
-  %}
-
-  enc_class enc_cmovd_branch(cmpOp cop, regD dst, regD src)
-  %{
-    // Invert sense of branch from sense of cmov
-    emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
-    emit_d8(cbuf, $dst$$reg < 8 && $src$$reg < 8 ? 4 : 5); // REX
-
-    //  UseXmmRegToRegMoveAll ? movapd(dst, src) : movsd(dst, src)
-    emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
-    if ($dst$$reg < 8) {
-      if ($src$$reg >= 8) {
-        emit_opcode(cbuf, Assembler::REX_B);
-      }
-    } else {
-      if ($src$$reg < 8) {
-        emit_opcode(cbuf, Assembler::REX_R);
-      } else {
-        emit_opcode(cbuf, Assembler::REX_RB);
-      }
-    }
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
-    emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
-  %}
-
   enc_class enc_PartialSubtypeCheck()
   %{
     Register Rrdi = as_Register(RDI_enc); // result register
@@ -2553,21 +2228,6 @@
                    RELOC_DISP32);
   %}
 
-  enc_class preserve_SP %{
-    debug_only(int off0 = cbuf.insts_size());
-    MacroAssembler _masm(&cbuf);
-    // RBP is preserved across all calls, even compiled calls.
-    // Use it to preserve RSP in places where the callee might change the SP.
-    __ movptr(rbp_mh_SP_save, rsp);
-    debug_only(int off1 = cbuf.insts_size());
-    assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
-  %}
-
-  enc_class restore_SP %{
-    MacroAssembler _masm(&cbuf);
-    __ movptr(rsp, rbp_mh_SP_save);
-  %}
-
   enc_class Java_Static_Call(method meth)
   %{
     // JAVA STATIC CALL
@@ -2751,68 +2411,6 @@
     }
   %}
 
-  // Encode a reg-reg copy.  If it is useless, then empty encoding.
-  enc_class enc_copy(rRegI dst, rRegI src)
-  %{
-    encode_copy(cbuf, $dst$$reg, $src$$reg);
-  %}
-
-  // Encode xmm reg-reg copy.  If it is useless, then empty encoding.
-  enc_class enc_CopyXD( RegD dst, RegD src ) %{
-    encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
-  %}
-
-  enc_class enc_copy_always(rRegI dst, rRegI src)
-  %{
-    int srcenc = $src$$reg;
-    int dstenc = $dst$$reg;
-
-    if (dstenc < 8) {
-      if (srcenc >= 8) {
-        emit_opcode(cbuf, Assembler::REX_B);
-        srcenc -= 8;
-      }
-    } else {
-      if (srcenc < 8) {
-        emit_opcode(cbuf, Assembler::REX_R);
-      } else {
-        emit_opcode(cbuf, Assembler::REX_RB);
-        srcenc -= 8;
-      }
-      dstenc -= 8;
-    }
-
-    emit_opcode(cbuf, 0x8B);
-    emit_rm(cbuf, 0x3, dstenc, srcenc);
-  %}
-
-  enc_class enc_copy_wide(rRegL dst, rRegL src)
-  %{
-    int srcenc = $src$$reg;
-    int dstenc = $dst$$reg;
-
-    if (dstenc != srcenc) {
-      if (dstenc < 8) {
-        if (srcenc < 8) {
-          emit_opcode(cbuf, Assembler::REX_W);
-        } else {
-          emit_opcode(cbuf, Assembler::REX_WB);
-          srcenc -= 8;
-        }
-      } else {
-        if (srcenc < 8) {
-          emit_opcode(cbuf, Assembler::REX_WR);
-        } else {
-          emit_opcode(cbuf, Assembler::REX_WRB);
-          srcenc -= 8;
-        }
-        dstenc -= 8;
-      }
-      emit_opcode(cbuf, 0x8B);
-      emit_rm(cbuf, 0x3, dstenc, srcenc);
-    }
-  %}
-
   enc_class Con32(immI src)
   %{
     // Output immediate
@@ -3212,92 +2810,19 @@
   %}
 
   enc_class Push_ResultXD(regD dst) %{
-    int dstenc = $dst$$reg;
-
-    store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [RSP]
-
-    // UseXmmLoadAndClearUpper ? movsd dst,[rsp] : movlpd dst,[rsp]
-    emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
-    if (dstenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_R);
-    }
-    emit_opcode  (cbuf, 0x0F );
-    emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12 );
-    encode_RegMem(cbuf, dstenc, RSP_enc, 0x4, 0, 0, false);
-
-    // add rsp,8
-    emit_opcode(cbuf, Assembler::REX_W);
-    emit_opcode(cbuf,0x83);
-    emit_rm(cbuf,0x3, 0x0, RSP_enc);
-    emit_d8(cbuf,0x08);
+    MacroAssembler _masm(&cbuf);
+    __ fstp_d(Address(rsp, 0));
+    __ movdbl($dst$$XMMRegister, Address(rsp, 0));
+    __ addptr(rsp, 8);
   %}
 
   enc_class Push_SrcXD(regD src) %{
-    int srcenc = $src$$reg;
-
-    // subq rsp,#8
-    emit_opcode(cbuf, Assembler::REX_W);
-    emit_opcode(cbuf, 0x83);
-    emit_rm(cbuf, 0x3, 0x5, RSP_enc);
-    emit_d8(cbuf, 0x8);
-
-    // movsd [rsp],src
-    emit_opcode(cbuf, 0xF2);
-    if (srcenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_R);
-    }
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, 0x11);
-    encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false);
-
-    // fldd [rsp]
-    emit_opcode(cbuf, 0x66);
-    emit_opcode(cbuf, 0xDD);
-    encode_RegMem(cbuf, 0x0, RSP_enc, 0x4, 0, 0, false);
-  %}
-
-
-  enc_class movq_ld(regD dst, memory mem) %{
     MacroAssembler _masm(&cbuf);
-    __ movq($dst$$XMMRegister, $mem$$Address);
-  %}
-
-  enc_class movq_st(memory mem, regD src) %{
-    MacroAssembler _masm(&cbuf);
-    __ movq($mem$$Address, $src$$XMMRegister);
-  %}
-
-  enc_class pshufd_8x8(regF dst, regF src) %{
-    MacroAssembler _masm(&cbuf);
-
-    encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
-    __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
-    __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
-  %}
-
-  enc_class pshufd_4x16(regF dst, regF src) %{
-    MacroAssembler _masm(&cbuf);
-
-    __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
-  %}
-
-  enc_class pshufd(regD dst, regD src, int mode) %{
-    MacroAssembler _masm(&cbuf);
-
-    __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
-  %}
-
-  enc_class pxor(regD dst, regD src) %{
-    MacroAssembler _masm(&cbuf);
-
-    __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
-  %}
-
-  enc_class mov_i2x(regD dst, rRegI src) %{
-    MacroAssembler _masm(&cbuf);
-
-    __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
-  %}
+    __ subptr(rsp, 8);
+    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
+    __ fld_d(Address(rsp, 0));
+  %}
+
 
   // obj: object to lock
   // box: box address (header location) -- killed
@@ -3534,303 +3059,6 @@
                    RELOC_DISP32);
   %}
 
-  enc_class absF_encoding(regF dst)
-  %{
-    int dstenc = $dst$$reg;
-    address signmask_address = (address) StubRoutines::x86::float_sign_mask();
-
-    cbuf.set_insts_mark();
-    if (dstenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_R);
-      dstenc -= 8;
-    }
-    // XXX reg_mem doesn't support RIP-relative addressing yet
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, 0x54);
-    emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
-    emit_d32_reloc(cbuf, signmask_address);
-  %}
-
-  enc_class absD_encoding(regD dst)
-  %{
-    int dstenc = $dst$$reg;
-    address signmask_address = (address) StubRoutines::x86::double_sign_mask();
-
-    cbuf.set_insts_mark();
-    emit_opcode(cbuf, 0x66);
-    if (dstenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_R);
-      dstenc -= 8;
-    }
-    // XXX reg_mem doesn't support RIP-relative addressing yet
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, 0x54);
-    emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
-    emit_d32_reloc(cbuf, signmask_address);
-  %}
-
-  enc_class negF_encoding(regF dst)
-  %{
-    int dstenc = $dst$$reg;
-    address signflip_address = (address) StubRoutines::x86::float_sign_flip();
-
-    cbuf.set_insts_mark();
-    if (dstenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_R);
-      dstenc -= 8;
-    }
-    // XXX reg_mem doesn't support RIP-relative addressing yet
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, 0x57);
-    emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
-    emit_d32_reloc(cbuf, signflip_address);
-  %}
-
-  enc_class negD_encoding(regD dst)
-  %{
-    int dstenc = $dst$$reg;
-    address signflip_address = (address) StubRoutines::x86::double_sign_flip();
-
-    cbuf.set_insts_mark();
-    emit_opcode(cbuf, 0x66);
-    if (dstenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_R);
-      dstenc -= 8;
-    }
-    // XXX reg_mem doesn't support RIP-relative addressing yet
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, 0x57);
-    emit_rm(cbuf, 0x0, dstenc, 0x5);  // 00 reg 101
-    emit_d32_reloc(cbuf, signflip_address);
-  %}
-
-  enc_class f2i_fixup(rRegI dst, regF src)
-  %{
-    int dstenc = $dst$$reg;
-    int srcenc = $src$$reg;
-
-    // cmpl $dst, #0x80000000
-    if (dstenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_B);
-    }
-    emit_opcode(cbuf, 0x81);
-    emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
-    emit_d32(cbuf, 0x80000000);
-
-    // jne,s done
-    emit_opcode(cbuf, 0x75);
-    if (srcenc < 8 && dstenc < 8) {
-      emit_d8(cbuf, 0xF);
-    } else if (srcenc >= 8 && dstenc >= 8) {
-      emit_d8(cbuf, 0x11);
-    } else {
-      emit_d8(cbuf, 0x10);
-    }
-
-    // subq rsp, #8
-    emit_opcode(cbuf, Assembler::REX_W);
-    emit_opcode(cbuf, 0x83);
-    emit_rm(cbuf, 0x3, 0x5, RSP_enc);
-    emit_d8(cbuf, 8);
-
-    // movss [rsp], $src
-    emit_opcode(cbuf, 0xF3);
-    if (srcenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_R);
-    }
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, 0x11);
-    encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
-
-    // call f2i_fixup
-    cbuf.set_insts_mark();
-    emit_opcode(cbuf, 0xE8);
-    emit_d32_reloc(cbuf,
-                   (int)
-                   (StubRoutines::x86::f2i_fixup() - cbuf.insts_end() - 4),
-                   runtime_call_Relocation::spec(),
-                   RELOC_DISP32);
-
-    // popq $dst
-    if (dstenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_B);
-    }
-    emit_opcode(cbuf, 0x58 | (dstenc & 7));
-
-    // done:
-  %}
-
-  enc_class f2l_fixup(rRegL dst, regF src)
-  %{
-    int dstenc = $dst$$reg;
-    int srcenc = $src$$reg;
-    address const_address = (address) StubRoutines::x86::double_sign_flip();
-
-    // cmpq $dst, [0x8000000000000000]
-    cbuf.set_insts_mark();
-    emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
-    emit_opcode(cbuf, 0x39);
-    // XXX reg_mem doesn't support RIP-relative addressing yet
-    emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
-    emit_d32_reloc(cbuf, const_address);
-
-
-    // jne,s done
-    emit_opcode(cbuf, 0x75);
-    if (srcenc < 8 && dstenc < 8) {
-      emit_d8(cbuf, 0xF);
-    } else if (srcenc >= 8 && dstenc >= 8) {
-      emit_d8(cbuf, 0x11);
-    } else {
-      emit_d8(cbuf, 0x10);
-    }
-
-    // subq rsp, #8
-    emit_opcode(cbuf, Assembler::REX_W);
-    emit_opcode(cbuf, 0x83);
-    emit_rm(cbuf, 0x3, 0x5, RSP_enc);
-    emit_d8(cbuf, 8);
-
-    // movss [rsp], $src
-    emit_opcode(cbuf, 0xF3);
-    if (srcenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_R);
-    }
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, 0x11);
-    encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
-
-    // call f2l_fixup
-    cbuf.set_insts_mark();
-    emit_opcode(cbuf, 0xE8);
-    emit_d32_reloc(cbuf,
-                   (int)
-                   (StubRoutines::x86::f2l_fixup() - cbuf.insts_end() - 4),
-                   runtime_call_Relocation::spec(),
-                   RELOC_DISP32);
-
-    // popq $dst
-    if (dstenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_B);
-    }
-    emit_opcode(cbuf, 0x58 | (dstenc & 7));
-
-    // done:
-  %}
-
-  enc_class d2i_fixup(rRegI dst, regD src)
-  %{
-    int dstenc = $dst$$reg;
-    int srcenc = $src$$reg;
-
-    // cmpl $dst, #0x80000000
-    if (dstenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_B);
-    }
-    emit_opcode(cbuf, 0x81);
-    emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
-    emit_d32(cbuf, 0x80000000);
-
-    // jne,s done
-    emit_opcode(cbuf, 0x75);
-    if (srcenc < 8 && dstenc < 8) {
-      emit_d8(cbuf, 0xF);
-    } else if (srcenc >= 8 && dstenc >= 8) {
-      emit_d8(cbuf, 0x11);
-    } else {
-      emit_d8(cbuf, 0x10);
-    }
-
-    // subq rsp, #8
-    emit_opcode(cbuf, Assembler::REX_W);
-    emit_opcode(cbuf, 0x83);
-    emit_rm(cbuf, 0x3, 0x5, RSP_enc);
-    emit_d8(cbuf, 8);
-
-    // movsd [rsp], $src
-    emit_opcode(cbuf, 0xF2);
-    if (srcenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_R);
-    }
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, 0x11);
-    encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
-
-    // call d2i_fixup
-    cbuf.set_insts_mark();
-    emit_opcode(cbuf, 0xE8);
-    emit_d32_reloc(cbuf,
-                   (int)
-                   (StubRoutines::x86::d2i_fixup() - cbuf.insts_end() - 4),
-                   runtime_call_Relocation::spec(),
-                   RELOC_DISP32);
-
-    // popq $dst
-    if (dstenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_B);
-    }
-    emit_opcode(cbuf, 0x58 | (dstenc & 7));
-
-    // done:
-  %}
-
-  enc_class d2l_fixup(rRegL dst, regD src)
-  %{
-    int dstenc = $dst$$reg;
-    int srcenc = $src$$reg;
-    address const_address = (address) StubRoutines::x86::double_sign_flip();
-
-    // cmpq $dst, [0x8000000000000000]
-    cbuf.set_insts_mark();
-    emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
-    emit_opcode(cbuf, 0x39);
-    // XXX reg_mem doesn't support RIP-relative addressing yet
-    emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
-    emit_d32_reloc(cbuf, const_address);
-
-
-    // jne,s done
-    emit_opcode(cbuf, 0x75);
-    if (srcenc < 8 && dstenc < 8) {
-      emit_d8(cbuf, 0xF);
-    } else if (srcenc >= 8 && dstenc >= 8) {
-      emit_d8(cbuf, 0x11);
-    } else {
-      emit_d8(cbuf, 0x10);
-    }
-
-    // subq rsp, #8
-    emit_opcode(cbuf, Assembler::REX_W);
-    emit_opcode(cbuf, 0x83);
-    emit_rm(cbuf, 0x3, 0x5, RSP_enc);
-    emit_d8(cbuf, 8);
-
-    // movsd [rsp], $src
-    emit_opcode(cbuf, 0xF2);
-    if (srcenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_R);
-    }
-    emit_opcode(cbuf, 0x0F);
-    emit_opcode(cbuf, 0x11);
-    encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
-
-    // call d2l_fixup
-    cbuf.set_insts_mark();
-    emit_opcode(cbuf, 0xE8);
-    emit_d32_reloc(cbuf,
-                   (int)
-                   (StubRoutines::x86::d2l_fixup() - cbuf.insts_end() - 4),
-                   runtime_call_Relocation::spec(),
-                   RELOC_DISP32);
-
-    // popq $dst
-    if (dstenc >= 8) {
-      emit_opcode(cbuf, Assembler::REX_B);
-    }
-    emit_opcode(cbuf, 0x58 | (dstenc & 7));
-
-    // done:
-  %}
 %}
 
 
@@ -3938,9 +3166,9 @@
   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
   // Otherwise, it is above the locks and verification slot and alignment word
   return_addr(STACK - 2 +
-              round_to(2 + 2 * VerifyStackAtCalls +
-                       Compile::current()->fixed_slots(),
-                       WordsPerLong * 2));
+              round_to((Compile::current()->in_preserve_stack_slots() +
+                        Compile::current()->fixed_slots()),
+                       stack_alignment_in_slots()));
 
   // Body of function which returns an integer array locating
   // arguments either in registers or in stack slots.  Passed an array
@@ -6156,8 +5384,9 @@
 
   ins_cost(145); // XXX
   format %{ "movss   $dst, $mem\t# float" %}
-  opcode(0xF3, 0x0F, 0x10);
-  ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
+  ins_encode %{
+    __ movflt($dst$$XMMRegister, $mem$$Address);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -6169,8 +5398,9 @@
 
   ins_cost(145); // XXX
   format %{ "movlpd  $dst, $mem\t# double" %}
-  opcode(0x66, 0x0F, 0x12);
-  ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
+  ins_encode %{
+    __ movdbl($dst$$XMMRegister, $mem$$Address);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -6181,8 +5411,9 @@
 
   ins_cost(145); // XXX
   format %{ "movsd   $dst, $mem\t# double" %}
-  opcode(0xF2, 0x0F, 0x10);
-  ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
+  ins_encode %{
+    __ movdbl($dst$$XMMRegister, $mem$$Address);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -6191,7 +5422,9 @@
   match(Set dst (Load8B mem));
   ins_cost(125);
   format %{ "MOVQ  $dst,$mem\t! packed8B" %}
-  ins_encode( movq_ld(dst, mem));
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $mem$$Address);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -6200,7 +5433,9 @@
   match(Set dst (Load4S mem));
   ins_cost(125);
   format %{ "MOVQ  $dst,$mem\t! packed4S" %}
-  ins_encode( movq_ld(dst, mem));
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $mem$$Address);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -6209,7 +5444,9 @@
   match(Set dst (Load4C mem));
   ins_cost(125);
   format %{ "MOVQ  $dst,$mem\t! packed4C" %}
-  ins_encode( movq_ld(dst, mem));
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $mem$$Address);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -6218,16 +5455,20 @@
   match(Set dst (Load2I mem));
   ins_cost(125);
   format %{ "MOVQ  $dst,$mem\t! packed2I" %}
-  ins_encode( movq_ld(dst, mem));
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $mem$$Address);
+  %}
   ins_pipe( pipe_slow );
 %}
 
 // Load Aligned Packed Single to XMM
 instruct loadA2F(regD dst, memory mem) %{
   match(Set dst (Load2F mem));
-  ins_cost(145);
+  ins_cost(125);
   format %{ "MOVQ  $dst,$mem\t! packed2F" %}
-  ins_encode( movq_ld(dst, mem));
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $mem$$Address);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -6540,8 +5781,9 @@
   ins_cost(100);
 
   format %{ "xorps   $dst, $dst\t# float 0.0" %}
-  opcode(0x0F, 0x57);
-  ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
+  ins_encode %{
+    __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -6562,8 +5804,9 @@
   ins_cost(100);
 
   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
-  opcode(0x66, 0x0F, 0x57);
-  ins_encode(OpcP, REX_reg_reg(dst, dst), OpcS, OpcT, reg_reg(dst, dst));
+  ins_encode %{
+    __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -6606,8 +5849,9 @@
 
   ins_cost(125);
   format %{ "movss   $dst, $src\t# float stk" %}
-  opcode(0xF3, 0x0F, 0x10);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+  ins_encode %{
+    __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -6972,7 +6216,9 @@
   match(Set mem (Store8B mem src));
   ins_cost(145);
   format %{ "MOVQ  $mem,$src\t! packed8B" %}
-  ins_encode( movq_st(mem, src));
+  ins_encode %{
+    __ movq($mem$$Address, $src$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -6981,7 +6227,9 @@
   match(Set mem (Store4C mem src));
   ins_cost(145);
   format %{ "MOVQ  $mem,$src\t! packed4C" %}
-  ins_encode( movq_st(mem, src));
+  ins_encode %{
+    __ movq($mem$$Address, $src$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -6990,7 +6238,9 @@
   match(Set mem (Store2I mem src));
   ins_cost(145);
   format %{ "MOVQ  $mem,$src\t! packed2I" %}
-  ins_encode( movq_st(mem, src));
+  ins_encode %{
+    __ movq($mem$$Address, $src$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -7024,7 +6274,9 @@
   match(Set mem (Store2F mem src));
   ins_cost(145);
   format %{ "MOVQ  $mem,$src\t! packed2F" %}
-  ins_encode( movq_st(mem, src));
+  ins_encode %{
+    __ movq($mem$$Address, $src$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -7035,8 +6287,9 @@
 
   ins_cost(95); // XXX
   format %{ "movss   $mem, $src\t# float" %}
-  opcode(0xF3, 0x0F, 0x11);
-  ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
+  ins_encode %{
+    __ movflt($mem$$Address, $src$$XMMRegister);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -7072,8 +6325,9 @@
 
   ins_cost(95); // XXX
   format %{ "movsd   $mem, $src\t# double" %}
-  opcode(0xF2, 0x0F, 0x11);
-  ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
+  ins_encode %{
+    __ movdbl($mem$$Address, $src$$XMMRegister);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -7142,8 +6396,9 @@
 
   ins_cost(95); // XXX
   format %{ "movss   $dst, $src\t# float stk" %}
-  opcode(0xF3, 0x0F, 0x11);
-  ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
+  ins_encode %{
+    __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -7153,8 +6408,9 @@
 
   ins_cost(95); // XXX
   format %{ "movsd   $dst, $src\t# double stk" %}
-  opcode(0xF2, 0x0F, 0x11);
-  ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
+  ins_encode %{
+    __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -7444,6 +6700,16 @@
   ins_pipe(empty);
 %}
 
+instruct membar_storestore() %{
+  match(MemBarStoreStore);
+  ins_cost(0);
+
+  size(0);
+  format %{ "MEMBAR-storestore (empty encoding)" %}
+  ins_encode( );
+  ins_pipe(empty);
+%}
+
 //----------Move Instructions--------------------------------------------------
 
 instruct castX2P(rRegP dst, rRegL src)
@@ -7451,7 +6717,11 @@
   match(Set dst (CastX2P src));
 
   format %{ "movq    $dst, $src\t# long->ptr" %}
-  ins_encode(enc_copy_wide(dst, src));
+  ins_encode %{
+    if ($dst$$reg != $src$$reg) {
+      __ movptr($dst$$Register, $src$$Register);
+    }
+  %}
   ins_pipe(ialu_reg_reg); // XXX
 %}
 
@@ -7460,7 +6730,11 @@
   match(Set dst (CastP2X src));
 
   format %{ "movq    $dst, $src\t# ptr -> long" %}
-  ins_encode(enc_copy_wide(dst, src));
+  ins_encode %{
+    if ($dst$$reg != $src$$reg) {
+      __ movptr($dst$$Register, $src$$Register);
+    }
+  %}
   ins_pipe(ialu_reg_reg); // XXX
 %}
 
@@ -7813,7 +7087,13 @@
   format %{ "jn$cop    skip\t# signed cmove float\n\t"
             "movss     $dst, $src\n"
     "skip:" %}
-  ins_encode(enc_cmovf_branch(cop, dst, src));
+  ins_encode %{
+    Label Lskip;
+    // Invert sense of branch from sense of CMOV
+    __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
+    __ movflt($dst$$XMMRegister, $src$$XMMRegister);
+    __ bind(Lskip);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -7837,7 +7117,13 @@
   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
             "movss     $dst, $src\n"
     "skip:" %}
-  ins_encode(enc_cmovf_branch(cop, dst, src));
+  ins_encode %{
+    Label Lskip;
+    // Invert sense of branch from sense of CMOV
+    __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
+    __ movflt($dst$$XMMRegister, $src$$XMMRegister);
+    __ bind(Lskip);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -7857,7 +7143,13 @@
   format %{ "jn$cop    skip\t# signed cmove double\n\t"
             "movsd     $dst, $src\n"
     "skip:" %}
-  ins_encode(enc_cmovd_branch(cop, dst, src));
+  ins_encode %{
+    Label Lskip;
+    // Invert sense of branch from sense of CMOV
+    __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
+    __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
+    __ bind(Lskip);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -7869,7 +7161,13 @@
   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
             "movsd     $dst, $src\n"
     "skip:" %}
-  ins_encode(enc_cmovd_branch(cop, dst, src));
+  ins_encode %{
+    Label Lskip;
+    // Invert sense of branch from sense of CMOV
+    __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
+    __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
+    __ bind(Lskip);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10191,17 +9489,18 @@
             "pushfq\t# saw NaN, set CF\n\t"
             "andq    [rsp], #0xffffff2b\n\t"
             "popfq\n"
-    "exit:   nop\t# avoid branch to branch" %}
-  opcode(0x0F, 0x2E);
-  ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
-             cmpfp_fixup);
+    "exit:" %}
+  ins_encode %{
+    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
+    emit_cmpfp_fixup(_masm);
+  %}
   ins_pipe(pipe_slow);
 %}
 
 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
   match(Set cr (CmpF src1 src2));
 
-  ins_cost(145);
+  ins_cost(100);
   format %{ "ucomiss $src1, $src2" %}
   ins_encode %{
     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
@@ -10219,10 +9518,11 @@
             "pushfq\t# saw NaN, set CF\n\t"
             "andq    [rsp], #0xffffff2b\n\t"
             "popfq\n"
-    "exit:   nop\t# avoid branch to branch" %}
-  opcode(0x0F, 0x2E);
-  ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
-             cmpfp_fixup);
+    "exit:" %}
+  ins_encode %{
+    __ ucomiss($src1$$XMMRegister, $src2$$Address);
+    emit_cmpfp_fixup(_masm);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10231,8 +9531,9 @@
 
   ins_cost(100);
   format %{ "ucomiss $src1, $src2" %}
-  opcode(0x0F, 0x2E);
-  ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2));
+  ins_encode %{
+    __ ucomiss($src1$$XMMRegister, $src2$$Address);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10245,7 +9546,7 @@
             "pushfq\t# saw NaN, set CF\n\t"
             "andq    [rsp], #0xffffff2b\n\t"
             "popfq\n"
-    "exit:   nop\t# avoid branch to branch" %}
+    "exit:" %}
   ins_encode %{
     __ ucomiss($src$$XMMRegister, $constantaddress($con));
     emit_cmpfp_fixup(_masm);
@@ -10273,10 +9574,11 @@
             "pushfq\t# saw NaN, set CF\n\t"
             "andq    [rsp], #0xffffff2b\n\t"
             "popfq\n"
-    "exit:   nop\t# avoid branch to branch" %}
-  opcode(0x66, 0x0F, 0x2E);
-  ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
-             cmpfp_fixup);
+    "exit:" %}
+  ins_encode %{
+    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
+    emit_cmpfp_fixup(_masm);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10301,10 +9603,11 @@
             "pushfq\t# saw NaN, set CF\n\t"
             "andq    [rsp], #0xffffff2b\n\t"
             "popfq\n"
-    "exit:   nop\t# avoid branch to branch" %}
-  opcode(0x66, 0x0F, 0x2E);
-  ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
-             cmpfp_fixup);
+    "exit:" %}
+  ins_encode %{
+    __ ucomisd($src1$$XMMRegister, $src2$$Address);
+    emit_cmpfp_fixup(_masm);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10313,8 +9616,9 @@
 
   ins_cost(100);
   format %{ "ucomisd $src1, $src2" %}
-  opcode(0x66, 0x0F, 0x2E);
-  ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2));
+  ins_encode %{
+    __ ucomisd($src1$$XMMRegister, $src2$$Address);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10327,7 +9631,7 @@
             "pushfq\t# saw NaN, set CF\n\t"
             "andq    [rsp], #0xffffff2b\n\t"
             "popfq\n"
-    "exit:   nop\t# avoid branch to branch" %}
+    "exit:" %}
   ins_encode %{
     __ ucomisd($src$$XMMRegister, $constantaddress($con));
     emit_cmpfp_fixup(_masm);
@@ -10359,10 +9663,10 @@
             "setne   $dst\n\t"
             "movzbl  $dst, $dst\n"
     "done:" %}
-
-  opcode(0x0F, 0x2E);
-  ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
-             cmpfp3(dst));
+  ins_encode %{
+    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
+    emit_cmpfp3(_masm, $dst$$Register);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10380,10 +9684,10 @@
             "setne   $dst\n\t"
             "movzbl  $dst, $dst\n"
     "done:" %}
-
-  opcode(0x0F, 0x2E);
-  ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
-             cmpfp3(dst));
+  ins_encode %{
+    __ ucomiss($src1$$XMMRegister, $src2$$Address);
+    emit_cmpfp3(_masm, $dst$$Register);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10401,15 +9705,8 @@
             "movzbl  $dst, $dst\n"
     "done:" %}
   ins_encode %{
-    Label L_done;
-    Register Rdst = $dst$$Register;
     __ ucomiss($src$$XMMRegister, $constantaddress($con));
-    __ movl(Rdst, -1);
-    __ jcc(Assembler::parity, L_done);
-    __ jcc(Assembler::below, L_done);
-    __ setb(Assembler::notEqual, Rdst);
-    __ movzbl(Rdst, Rdst);
-    __ bind(L_done);
+    emit_cmpfp3(_masm, $dst$$Register);
   %}
   ins_pipe(pipe_slow);
 %}
@@ -10428,10 +9725,10 @@
             "setne   $dst\n\t"
             "movzbl  $dst, $dst\n"
     "done:" %}
-
-  opcode(0x66, 0x0F, 0x2E);
-  ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
-             cmpfp3(dst));
+  ins_encode %{
+    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
+    emit_cmpfp3(_masm, $dst$$Register);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10449,10 +9746,10 @@
             "setne   $dst\n\t"
             "movzbl  $dst, $dst\n"
     "done:" %}
-
-  opcode(0x66, 0x0F, 0x2E);
-  ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
-             cmpfp3(dst));
+  ins_encode %{
+    __ ucomisd($src1$$XMMRegister, $src2$$Address);
+    emit_cmpfp3(_masm, $dst$$Register);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10470,374 +9767,9 @@
             "movzbl  $dst, $dst\n"
     "done:" %}
   ins_encode %{
-    Register Rdst = $dst$$Register;
-    Label L_done;
     __ ucomisd($src$$XMMRegister, $constantaddress($con));
-    __ movl(Rdst, -1);
-    __ jcc(Assembler::parity, L_done);
-    __ jcc(Assembler::below, L_done);
-    __ setb(Assembler::notEqual, Rdst);
-    __ movzbl(Rdst, Rdst);
-    __ bind(L_done);
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct addF_reg(regF dst, regF src)
-%{
-  match(Set dst (AddF dst src));
-
-  format %{ "addss   $dst, $src" %}
-  ins_cost(150); // XXX
-  opcode(0xF3, 0x0F, 0x58);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
-  ins_pipe(pipe_slow);
-%}
-
-instruct addF_mem(regF dst, memory src)
-%{
-  match(Set dst (AddF dst (LoadF src)));
-
-  format %{ "addss   $dst, $src" %}
-  ins_cost(150); // XXX
-  opcode(0xF3, 0x0F, 0x58);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
-  ins_pipe(pipe_slow);
-%}
-
-instruct addF_imm(regF dst, immF con) %{
-  match(Set dst (AddF dst con));
-  format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
-  ins_cost(150); // XXX
-  ins_encode %{
-    __ addss($dst$$XMMRegister, $constantaddress($con));
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct addD_reg(regD dst, regD src)
-%{
-  match(Set dst (AddD dst src));
-
-  format %{ "addsd   $dst, $src" %}
-  ins_cost(150); // XXX
-  opcode(0xF2, 0x0F, 0x58);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
-  ins_pipe(pipe_slow);
-%}
-
-instruct addD_mem(regD dst, memory src)
-%{
-  match(Set dst (AddD dst (LoadD src)));
-
-  format %{ "addsd   $dst, $src" %}
-  ins_cost(150); // XXX
-  opcode(0xF2, 0x0F, 0x58);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
-  ins_pipe(pipe_slow);
-%}
-
-instruct addD_imm(regD dst, immD con) %{
-  match(Set dst (AddD dst con));
-  format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
-  ins_cost(150); // XXX
-  ins_encode %{
-    __ addsd($dst$$XMMRegister, $constantaddress($con));
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct subF_reg(regF dst, regF src)
-%{
-  match(Set dst (SubF dst src));
-
-  format %{ "subss   $dst, $src" %}
-  ins_cost(150); // XXX
-  opcode(0xF3, 0x0F, 0x5C);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
-  ins_pipe(pipe_slow);
-%}
-
-instruct subF_mem(regF dst, memory src)
-%{
-  match(Set dst (SubF dst (LoadF src)));
-
-  format %{ "subss   $dst, $src" %}
-  ins_cost(150); // XXX
-  opcode(0xF3, 0x0F, 0x5C);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
-  ins_pipe(pipe_slow);
-%}
-
-instruct subF_imm(regF dst, immF con) %{
-  match(Set dst (SubF dst con));
-  format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
-  ins_cost(150); // XXX
-  ins_encode %{
-    __ subss($dst$$XMMRegister, $constantaddress($con));
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct subD_reg(regD dst, regD src)
-%{
-  match(Set dst (SubD dst src));
-
-  format %{ "subsd   $dst, $src" %}
-  ins_cost(150); // XXX
-  opcode(0xF2, 0x0F, 0x5C);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
-  ins_pipe(pipe_slow);
-%}
-
-instruct subD_mem(regD dst, memory src)
-%{
-  match(Set dst (SubD dst (LoadD src)));
-
-  format %{ "subsd   $dst, $src" %}
-  ins_cost(150); // XXX
-  opcode(0xF2, 0x0F, 0x5C);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
-  ins_pipe(pipe_slow);
-%}
-
-instruct subD_imm(regD dst, immD con) %{
-  match(Set dst (SubD dst con));
-  format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
-  ins_cost(150); // XXX
-  ins_encode %{
-    __ subsd($dst$$XMMRegister, $constantaddress($con));
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct mulF_reg(regF dst, regF src)
-%{
-  match(Set dst (MulF dst src));
-
-  format %{ "mulss   $dst, $src" %}
-  ins_cost(150); // XXX
-  opcode(0xF3, 0x0F, 0x59);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
-  ins_pipe(pipe_slow);
-%}
-
-instruct mulF_mem(regF dst, memory src)
-%{
-  match(Set dst (MulF dst (LoadF src)));
-
-  format %{ "mulss   $dst, $src" %}
-  ins_cost(150); // XXX
-  opcode(0xF3, 0x0F, 0x59);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
-  ins_pipe(pipe_slow);
-%}
-
-instruct mulF_imm(regF dst, immF con) %{
-  match(Set dst (MulF dst con));
-  format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
-  ins_cost(150); // XXX
-  ins_encode %{
-    __ mulss($dst$$XMMRegister, $constantaddress($con));
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct mulD_reg(regD dst, regD src)
-%{
-  match(Set dst (MulD dst src));
-
-  format %{ "mulsd   $dst, $src" %}
-  ins_cost(150); // XXX
-  opcode(0xF2, 0x0F, 0x59);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
-  ins_pipe(pipe_slow);
-%}
-
-instruct mulD_mem(regD dst, memory src)
-%{
-  match(Set dst (MulD dst (LoadD src)));
-
-  format %{ "mulsd   $dst, $src" %}
-  ins_cost(150); // XXX
-  opcode(0xF2, 0x0F, 0x59);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
-  ins_pipe(pipe_slow);
-%}
-
-instruct mulD_imm(regD dst, immD con) %{
-  match(Set dst (MulD dst con));
-  format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
-  ins_cost(150); // XXX
-  ins_encode %{
-    __ mulsd($dst$$XMMRegister, $constantaddress($con));
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct divF_reg(regF dst, regF src)
-%{
-  match(Set dst (DivF dst src));
-
-  format %{ "divss   $dst, $src" %}
-  ins_cost(150); // XXX
-  opcode(0xF3, 0x0F, 0x5E);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
-  ins_pipe(pipe_slow);
-%}
-
-instruct divF_mem(regF dst, memory src)
-%{
-  match(Set dst (DivF dst (LoadF src)));
-
-  format %{ "divss   $dst, $src" %}
-  ins_cost(150); // XXX
-  opcode(0xF3, 0x0F, 0x5E);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
-  ins_pipe(pipe_slow);
-%}
-
-instruct divF_imm(regF dst, immF con) %{
-  match(Set dst (DivF dst con));
-  format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
-  ins_cost(150); // XXX
-  ins_encode %{
-    __ divss($dst$$XMMRegister, $constantaddress($con));
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct divD_reg(regD dst, regD src)
-%{
-  match(Set dst (DivD dst src));
-
-  format %{ "divsd   $dst, $src" %}
-  ins_cost(150); // XXX
-  opcode(0xF2, 0x0F, 0x5E);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
-  ins_pipe(pipe_slow);
-%}
-
-instruct divD_mem(regD dst, memory src)
-%{
-  match(Set dst (DivD dst (LoadD src)));
-
-  format %{ "divsd   $dst, $src" %}
-  ins_cost(150); // XXX
-  opcode(0xF2, 0x0F, 0x5E);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
-  ins_pipe(pipe_slow);
-%}
-
-instruct divD_imm(regD dst, immD con) %{
-  match(Set dst (DivD dst con));
-  format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
-  ins_cost(150); // XXX
-  ins_encode %{
-    __ divsd($dst$$XMMRegister, $constantaddress($con));
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct sqrtF_reg(regF dst, regF src)
-%{
-  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
-
-  format %{ "sqrtss  $dst, $src" %}
-  ins_cost(150); // XXX
-  opcode(0xF3, 0x0F, 0x51);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
-  ins_pipe(pipe_slow);
-%}
-
-instruct sqrtF_mem(regF dst, memory src)
-%{
-  match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
-
-  format %{ "sqrtss  $dst, $src" %}
-  ins_cost(150); // XXX
-  opcode(0xF3, 0x0F, 0x51);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
-  ins_pipe(pipe_slow);
-%}
-
-instruct sqrtF_imm(regF dst, immF con) %{
-  match(Set dst (ConvD2F (SqrtD (ConvF2D con))));
-  format %{ "sqrtss  $dst, [$constantaddress]\t# load from constant table: float=$con" %}
-  ins_cost(150); // XXX
-  ins_encode %{
-    __ sqrtss($dst$$XMMRegister, $constantaddress($con));
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct sqrtD_reg(regD dst, regD src)
-%{
-  match(Set dst (SqrtD src));
-
-  format %{ "sqrtsd  $dst, $src" %}
-  ins_cost(150); // XXX
-  opcode(0xF2, 0x0F, 0x51);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
-  ins_pipe(pipe_slow);
-%}
-
-instruct sqrtD_mem(regD dst, memory src)
-%{
-  match(Set dst (SqrtD (LoadD src)));
-
-  format %{ "sqrtsd  $dst, $src" %}
-  ins_cost(150); // XXX
-  opcode(0xF2, 0x0F, 0x51);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
-  ins_pipe(pipe_slow);
-%}
-
-instruct sqrtD_imm(regD dst, immD con) %{
-  match(Set dst (SqrtD con));
-  format %{ "sqrtsd  $dst, [$constantaddress]\t# load from constant table: double=$con" %}
-  ins_cost(150); // XXX
-  ins_encode %{
-    __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct absF_reg(regF dst)
-%{
-  match(Set dst (AbsF dst));
-
-  format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
-  ins_encode(absF_encoding(dst));
-  ins_pipe(pipe_slow);
-%}
-
-instruct absD_reg(regD dst)
-%{
-  match(Set dst (AbsD dst));
-
-  format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
-            "# abs double by sign masking" %}
-  ins_encode(absD_encoding(dst));
-  ins_pipe(pipe_slow);
-%}
-
-instruct negF_reg(regF dst)
-%{
-  match(Set dst (NegF dst));
-
-  format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
-  ins_encode(negF_encoding(dst));
-  ins_pipe(pipe_slow);
-%}
-
-instruct negD_reg(regD dst)
-%{
-  match(Set dst (NegD dst));
-
-  format %{ "xorpd   $dst, [0x8000000000000000]\t"
-            "# neg double by sign flipping" %}
-  ins_encode(negD_encoding(dst));
+    emit_cmpfp3(_masm, $dst$$Register);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10929,8 +9861,9 @@
   match(Set dst (ConvF2D src));
 
   format %{ "cvtss2sd $dst, $src" %}
-  opcode(0xF3, 0x0F, 0x5A);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
+  ins_encode %{
+    __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -10939,8 +9872,9 @@
   match(Set dst (ConvF2D (LoadF src)));
 
   format %{ "cvtss2sd $dst, $src" %}
-  opcode(0xF3, 0x0F, 0x5A);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+  ins_encode %{
+    __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -10949,8 +9883,9 @@
   match(Set dst (ConvD2F src));
 
   format %{ "cvtsd2ss $dst, $src" %}
-  opcode(0xF2, 0x0F, 0x5A);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
+  ins_encode %{
+    __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -10959,8 +9894,9 @@
   match(Set dst (ConvD2F (LoadD src)));
 
   format %{ "cvtsd2ss $dst, $src" %}
-  opcode(0xF2, 0x0F, 0x5A);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+  ins_encode %{
+    __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -10978,9 +9914,17 @@
             "call    f2i_fixup\n\t"
             "popq    $dst\n"
     "done:   "%}
-  opcode(0xF3, 0x0F, 0x2C);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
-             f2i_fixup(dst, src));
+  ins_encode %{
+    Label done;
+    __ cvttss2sil($dst$$Register, $src$$XMMRegister);
+    __ cmpl($dst$$Register, 0x80000000);
+    __ jccb(Assembler::notEqual, done);
+    __ subptr(rsp, 8);
+    __ movflt(Address(rsp, 0), $src$$XMMRegister);
+    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2i_fixup())));
+    __ pop($dst$$Register);
+    __ bind(done);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -10997,9 +9941,18 @@
             "call    f2l_fixup\n\t"
             "popq    $dst\n"
     "done:   "%}
-  opcode(0xF3, 0x0F, 0x2C);
-  ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
-             f2l_fixup(dst, src));
+  ins_encode %{
+    Label done;
+    __ cvttss2siq($dst$$Register, $src$$XMMRegister);
+    __ cmp64($dst$$Register,
+             ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
+    __ jccb(Assembler::notEqual, done);
+    __ subptr(rsp, 8);
+    __ movflt(Address(rsp, 0), $src$$XMMRegister);
+    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2l_fixup())));
+    __ pop($dst$$Register);
+    __ bind(done);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -11016,9 +9969,17 @@
             "call    d2i_fixup\n\t"
             "popq    $dst\n"
     "done:   "%}
-  opcode(0xF2, 0x0F, 0x2C);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
-             d2i_fixup(dst, src));
+  ins_encode %{
+    Label done;
+    __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
+    __ cmpl($dst$$Register, 0x80000000);
+    __ jccb(Assembler::notEqual, done);
+    __ subptr(rsp, 8);
+    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
+    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_fixup())));
+    __ pop($dst$$Register);
+    __ bind(done);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -11035,9 +9996,18 @@
             "call    d2l_fixup\n\t"
             "popq    $dst\n"
     "done:   "%}
-  opcode(0xF2, 0x0F, 0x2C);
-  ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
-             d2l_fixup(dst, src));
+  ins_encode %{
+    Label done;
+    __ cvttsd2siq($dst$$Register, $src$$XMMRegister);
+    __ cmp64($dst$$Register,
+             ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
+    __ jccb(Assembler::notEqual, done);
+    __ subptr(rsp, 8);
+    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
+    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_fixup())));
+    __ pop($dst$$Register);
+    __ bind(done);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -11047,8 +10017,9 @@
   match(Set dst (ConvI2F src));
 
   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
-  opcode(0xF3, 0x0F, 0x2A);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
+  ins_encode %{
+    __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -11057,8 +10028,9 @@
   match(Set dst (ConvI2F (LoadI src)));
 
   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
-  opcode(0xF3, 0x0F, 0x2A);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+  ins_encode %{
+    __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -11068,8 +10040,9 @@
   match(Set dst (ConvI2D src));
 
   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
-  opcode(0xF2, 0x0F, 0x2A);
-  ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
+  ins_encode %{
+    __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -11078,8 +10051,9 @@
   match(Set dst (ConvI2D (LoadI src)));
 
   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
-  opcode(0xF2, 0x0F, 0x2A);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+  ins_encode %{
+    __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -11116,8 +10090,9 @@
   match(Set dst (ConvL2F src));
 
   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
-  opcode(0xF3, 0x0F, 0x2A);
-  ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
+  ins_encode %{
+    __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -11126,8 +10101,9 @@
   match(Set dst (ConvL2F (LoadL src)));
 
   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
-  opcode(0xF3, 0x0F, 0x2A);
-  ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
+  ins_encode %{
+    __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -11136,8 +10112,9 @@
   match(Set dst (ConvL2D src));
 
   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
-  opcode(0xF2, 0x0F, 0x2A);
-  ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
+  ins_encode %{
+    __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -11146,8 +10123,9 @@
   match(Set dst (ConvL2D (LoadL src)));
 
   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
-  opcode(0xF2, 0x0F, 0x2A);
-  ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
+  ins_encode %{
+    __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
+  %}
   ins_pipe(pipe_slow); // XXX
 %}
 
@@ -11186,7 +10164,11 @@
   match(Set dst (AndL (ConvI2L src) mask));
 
   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
-  ins_encode(enc_copy(dst, src));
+  ins_encode %{
+    if ($dst$$reg != $src$$reg) {
+      __ movl($dst$$Register, $src$$Register);
+    }
+  %}
   ins_pipe(ialu_reg_reg);
 %}
 
@@ -11196,8 +10178,9 @@
   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
 
   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
-  opcode(0x8B);
-  ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
+  ins_encode %{
+    __ movl($dst$$Register, $src$$Address);
+  %}
   ins_pipe(ialu_reg_mem);
 %}
 
@@ -11206,7 +10189,9 @@
   match(Set dst (AndL src mask));
 
   format %{ "movl    $dst, $src\t# zero-extend long" %}
-  ins_encode(enc_copy_always(dst, src));
+  ins_encode %{
+    __ movl($dst$$Register, $src$$Register);
+  %}
   ins_pipe(ialu_reg_reg);
 %}
 
@@ -11215,7 +10200,9 @@
   match(Set dst (ConvL2I src));
 
   format %{ "movl    $dst, $src\t# l2i" %}
-  ins_encode(enc_copy_always(dst, src));
+  ins_encode %{
+    __ movl($dst$$Register, $src$$Register);
+  %}
   ins_pipe(ialu_reg_reg);
 %}
 
@@ -11226,8 +10213,9 @@
 
   ins_cost(125);
   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
-  opcode(0x8B);
-  ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
+  ins_encode %{
+    __ movl($dst$$Register, Address(rsp, $src$$disp));
+  %}
   ins_pipe(ialu_reg_mem);
 %}
 
@@ -11237,8 +10225,9 @@
 
   ins_cost(125);
   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
-  opcode(0xF3, 0x0F, 0x10);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+  ins_encode %{
+    __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -11248,8 +10237,9 @@
 
   ins_cost(125);
   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
-  opcode(0x8B);
-  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
+  ins_encode %{
+    __ movq($dst$$Register, Address(rsp, $src$$disp));
+  %}
   ins_pipe(ialu_reg_mem);
 %}
 
@@ -11260,8 +10250,9 @@
 
   ins_cost(125);
   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
-  opcode(0x66, 0x0F, 0x12);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+  ins_encode %{
+    __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -11272,8 +10263,9 @@
 
   ins_cost(125);
   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
-  opcode(0xF2, 0x0F, 0x10);
-  ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+  ins_encode %{
+    __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -11284,8 +10276,9 @@
 
   ins_cost(95); // XXX
   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
-  opcode(0xF3, 0x0F, 0x11);
-  ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
+  ins_encode %{
+    __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -11295,8 +10288,9 @@
 
   ins_cost(100);
   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
-  opcode(0x89);
-  ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
+  ins_encode %{
+    __ movl(Address(rsp, $dst$$disp), $src$$Register);
+  %}
   ins_pipe( ialu_mem_reg );
 %}
 
@@ -11306,8 +10300,9 @@
 
   ins_cost(95); // XXX
   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
-  opcode(0xF2, 0x0F, 0x11);
-  ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
+  ins_encode %{
+    __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
+  %}
   ins_pipe(pipe_slow);
 %}
 
@@ -11317,8 +10312,9 @@
 
   ins_cost(100);
   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
-  opcode(0x89);
-  ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
+  ins_encode %{
+    __ movq(Address(rsp, $dst$$disp), $src$$Register);
+  %}
   ins_pipe(ialu_mem_reg);
 %}
 
@@ -11327,7 +10323,9 @@
   effect(DEF dst, USE src);
   ins_cost(85);
   format %{ "movd    $dst,$src\t# MoveF2I" %}
-  ins_encode %{ __ movdl($dst$$Register, $src$$XMMRegister); %}
+  ins_encode %{
+    __ movdl($dst$$Register, $src$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -11336,7 +10334,9 @@
   effect(DEF dst, USE src);
   ins_cost(85);
   format %{ "movd    $dst,$src\t# MoveD2L" %}
-  ins_encode %{ __ movdq($dst$$Register, $src$$XMMRegister); %}
+  ins_encode %{
+    __ movdq($dst$$Register, $src$$XMMRegister);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -11346,7 +10346,9 @@
   effect(DEF dst, USE src);
   ins_cost(300);
   format %{ "movd    $dst,$src\t# MoveI2F" %}
-  ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -11355,7 +10357,9 @@
   effect(DEF dst, USE src);
   ins_cost(300);
   format %{ "movd    $dst,$src\t# MoveL2D" %}
-  ins_encode %{ __ movdq($dst$$XMMRegister, $src$$Register); %}
+  ins_encode %{
+     __ movdq($dst$$XMMRegister, $src$$Register);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -11365,7 +10369,13 @@
   format %{ "MOVDQA  $dst,$src\n\t"
             "PUNPCKLBW $dst,$dst\n\t"
             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
-  ins_encode( pshufd_8x8(dst, src));
+  ins_encode %{
+    if ($dst$$reg != $src$$reg) {
+      __ movdqa($dst$$XMMRegister, $src$$XMMRegister);
+    }
+    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -11375,7 +10385,11 @@
   format %{ "MOVD    $dst,$src\n\t"
             "PUNPCKLBW $dst,$dst\n\t"
             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
-  ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
   ins_pipe( pipe_slow );
 %}
 
@@ -11383,7 +10397,9 @@
 instruct Repl8B_immI0(regD dst, immI0 zero) %{
   match(Set dst (Replicate8B zero));
   format %{ "PXOR  $dst,$dst\t! replicate8B" %}
-  ins_encode( pxor(dst, dst));
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
@@ -11391,7 +10407,9 @@
 instruct Repl4S_reg(regD dst, regD src) %{
   match(Set dst (Replicate4S src));
   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
-  ins_encode( pshufd_4x16(dst, src));
+  ins_encode %{
+    __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
@@ -11400,7 +10418,10 @@
   match(Set dst (Replicate4S src));
   format %{ "MOVD    $dst,$src\n\t"
             "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
-  ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
@@ -11408,7 +10429,9 @@
 instruct Repl4S_immI0(regD dst, immI0 zero) %{
   match(Set dst (Replicate4S zero));
   format %{ "PXOR  $dst,$dst\t! replicate4S" %}
-  ins_encode( pxor(dst, dst));
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
@@ -11416,7 +10439,9 @@
 instruct Repl4C_reg(regD dst, regD src) %{
   match(Set dst (Replicate4C src));
   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
-  ins_encode( pshufd_4x16(dst, src));
+  ins_encode %{
+    __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
@@ -11425,7 +10450,10 @@
   match(Set dst (Replicate4C src));
   format %{ "MOVD    $dst,$src\n\t"
             "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
-  ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
@@ -11433,7 +10461,9 @@
 instruct Repl4C_immI0(regD dst, immI0 zero) %{
   match(Set dst (Replicate4C zero));
   format %{ "PXOR  $dst,$dst\t! replicate4C" %}
-  ins_encode( pxor(dst, dst));
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
@@ -11441,7 +10471,9 @@
 instruct Repl2I_reg(regD dst, regD src) %{
   match(Set dst (Replicate2I src));
   format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
-  ins_encode( pshufd(dst, src, 0x00));
+  ins_encode %{
+    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
@@ -11450,7 +10482,10 @@
   match(Set dst (Replicate2I src));
   format %{ "MOVD   $dst,$src\n\t"
             "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
-  ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
@@ -11458,7 +10493,9 @@
 instruct Repl2I_immI0(regD dst, immI0 zero) %{
   match(Set dst (Replicate2I zero));
   format %{ "PXOR  $dst,$dst\t! replicate2I" %}
-  ins_encode( pxor(dst, dst));
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
@@ -11466,7 +10503,9 @@
 instruct Repl2F_reg(regD dst, regD src) %{
   match(Set dst (Replicate2F src));
   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
-  ins_encode( pshufd(dst, src, 0xe0));
+  ins_encode %{
+    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
@@ -11474,7 +10513,9 @@
 instruct Repl2F_regF(regD dst, regF src) %{
   match(Set dst (Replicate2F src));
   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
-  ins_encode( pshufd(dst, src, 0xe0));
+  ins_encode %{
+    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
@@ -11482,7 +10523,9 @@
 instruct Repl2F_immF0(regD dst, immF0 zero) %{
   match(Set dst (Replicate2F zero));
   format %{ "PXOR  $dst,$dst\t! replicate2F" %}
-  ins_encode( pxor(dst, dst));
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
   ins_pipe( fpu_reg_reg );
 %}
 
@@ -12162,12 +11205,12 @@
   effect(KILL rcx, KILL cr);
 
   ins_cost(1100);  // slightly larger than the next version
-  format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
+  format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
-            "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
+            "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
     "miss:\t" %}
 
@@ -12185,12 +11228,12 @@
   effect(KILL rcx, KILL result);
 
   ins_cost(1000);
-  format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
+  format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
             "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
             "jne,s   miss\t\t# Missed: flags nz\n\t"
-            "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
+            "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
     "miss:\t" %}
 
   opcode(0x0); // No need to XOR RDI
@@ -12358,13 +11401,13 @@
 // inlined locking and unlocking
 
 instruct cmpFastLock(rFlagsReg cr,
-                     rRegP object, rRegP box, rax_RegI tmp, rRegP scr)
+                     rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr)
 %{
   match(Set cr (FastLock object box));
-  effect(TEMP tmp, TEMP scr);
+  effect(TEMP tmp, TEMP scr, USE_KILL box);
 
   ins_cost(300);
-  format %{ "fastlock $object,$box,$tmp,$scr" %}
+  format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr" %}
   ins_encode(Fast_Lock(object, box, tmp, scr));
   ins_pipe(pipe_slow);
 %}
@@ -12373,10 +11416,10 @@
                        rRegP object, rax_RegP box, rRegP tmp)
 %{
   match(Set cr (FastUnlock object box));
-  effect(TEMP tmp);
+  effect(TEMP tmp, USE_KILL box);
 
   ins_cost(300);
-  format %{ "fastunlock $object, $box, $tmp" %}
+  format %{ "fastunlock $object,$box\t! kills $box,$tmp" %}
   ins_encode(Fast_Unlock(object, box, tmp));
   ins_pipe(pipe_slow);
 %}
@@ -12583,6 +11626,21 @@
 %}
 
 
+// ============================================================================
+// This name is KNOWN by the ADLC and cannot be changed.
+// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
+// for this guy.
+instruct tlsLoadP(r15_RegP dst) %{
+  match(Set dst (ThreadLocal));
+  effect(DEF dst);
+
+  size(0);
+  format %{ "# TLS is in R15" %}
+  ins_encode( /*empty encoding*/ );
+  ins_pipe(ialu_reg_reg);
+%}
+
+
 //----------PEEPHOLE RULES-----------------------------------------------------
 // These must follow all instruction definitions as they use the names
 // defined in the instructions definitions.
--- a/src/cpu/zero/vm/frame_zero.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/zero/vm/frame_zero.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -418,7 +418,7 @@
   }
 }
 
-#ifdef ASSERT
+#ifndef PRODUCT
 
 void frame::describe_pd(FrameValues& values, int frame_no) {
 
--- a/src/cpu/zero/vm/frame_zero.inline.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/zero/vm/frame_zero.inline.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2007, 2008, 2009, 2010 Red Hat, Inc.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -72,6 +72,10 @@
   return fp() + 1;
 }
 
+inline intptr_t* frame::real_fp() const {
+  return fp();
+}
+
 inline intptr_t* frame::link() const {
   ShouldNotCallThis();
 }
--- a/src/cpu/zero/vm/methodHandles_zero.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/cpu/zero/vm/methodHandles_zero.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -29,43 +29,3 @@
   adapter_code_size = 0
 };
 
-#define TARGET_ARCH_NYI_6939861 1
-// ..#ifdef TARGET_ARCH_NYI_6939861
-// ..  // Here are some backward compatible declarations until the 6939861 ports are updated.
-// ..  #define _adapter_flyby    (_EK_LIMIT + 10)
-// ..  #define _adapter_ricochet (_EK_LIMIT + 11)
-// ..  #define _adapter_opt_spread_1    _adapter_opt_spread_1_ref
-// ..  #define _adapter_opt_spread_more _adapter_opt_spread_ref
-// ..  enum {
-// ..    _INSERT_NO_MASK   = -1,
-// ..    _INSERT_REF_MASK  = 0,
-// ..    _INSERT_INT_MASK  = 1,
-// ..    _INSERT_LONG_MASK = 3
-// ..  };
-// ..  static void get_ek_bound_mh_info(EntryKind ek, BasicType& arg_type, int& arg_mask, int& arg_slots) {
-// ..    arg_type = ek_bound_mh_arg_type(ek);
-// ..    arg_mask = 0;
-// ..    arg_slots = type2size[arg_type];;
-// ..  }
-// ..  static void get_ek_adapter_opt_swap_rot_info(EntryKind ek, int& swap_bytes, int& rotate) {
-// ..    int swap_slots = ek_adapter_opt_swap_slots(ek);
-// ..    rotate = ek_adapter_opt_swap_mode(ek);
-// ..    swap_bytes = swap_slots * Interpreter::stackElementSize;
-// ..  }
-// ..  static int get_ek_adapter_opt_spread_info(EntryKind ek) {
-// ..    return ek_adapter_opt_spread_count(ek);
-// ..  }
-// ..
-// ..  static void insert_arg_slots(MacroAssembler* _masm,
-// ..                               RegisterOrConstant arg_slots,
-// ..                               int arg_mask,
-// ..                               Register argslot_reg,
-// ..                               Register temp_reg, Register temp2_reg, Register temp3_reg = noreg);
-// ..
-// ..  static void remove_arg_slots(MacroAssembler* _masm,
-// ..                               RegisterOrConstant arg_slots,
-// ..                               Register argslot_reg,
-// ..                               Register temp_reg, Register temp2_reg, Register temp3_reg = noreg);
-// ..
-// ..  static void trace_method_handle(MacroAssembler* _masm, const char* adaptername) PRODUCT_RETURN;
-// ..#endif //TARGET_ARCH_NYI_6939861
--- a/src/os/bsd/vm/decoder_bsd.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,66 +0,0 @@
-/*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "prims/jvm.h"
-#include "utilities/decoder.hpp"
-
-#include <cxxabi.h>
-
-#ifdef __APPLE__
-
-void Decoder::initialize() {
-  _initialized = true;
-}
-
-void Decoder::uninitialize() {
-  _initialized = false;
-}
-
-bool Decoder::can_decode_C_frame_in_vm() {
-  return false;
-}
-
-Decoder::decoder_status Decoder::decode(address addr, const char* filepath, char *buf, int buflen, int *offset) {
-  return symbol_not_found;
-}
-
-
-#endif
-
-bool Decoder::demangle(const char* symbol, char *buf, int buflen) {
-  int   status;
-  char* result;
-  size_t size = (size_t)buflen;
-
-  // Don't pass buf to __cxa_demangle. In case of the 'buf' is too small,
-  // __cxa_demangle will call system "realloc" for additional memory, which
-  // may use different malloc/realloc mechanism that allocates 'buf'.
-  if ((result = abi::__cxa_demangle(symbol, NULL, NULL, &status)) != NULL) {
-    jio_snprintf(buf, buflen, "%s", result);
-      // call c library's free
-      ::free(result);
-      return true;
-  }
-  return false;
-}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/os/bsd/vm/decoder_machO.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+
+#ifdef __APPLE__
+#include "decoder_machO.hpp"
+#endif
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/os/bsd/vm/decoder_machO.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_BSD_VM_DECODER_MACHO_HPP
+#define OS_BSD_VM_DECODER_MACHO_HPP
+
+#ifdef __APPLE__
+
+#include "utilities/decoder.hpp"
+
+// Just a placehold for now, a real implementation should derive
+// from AbstractDecoder
+class MachODecoder : public NullDecoder {
+public:
+  MachODecoder() { }
+  ~MachODecoder() { }
+};
+
+#endif
+
+#endif // OS_BSD_VM_DECODER_MACHO_HPP
+
--- a/src/os/bsd/vm/jvm_bsd.h	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/os/bsd/vm/jvm_bsd.h	Mon Feb 27 15:06:36 2012 -0800
@@ -33,7 +33,6 @@
 // All local includes have been commented out.
 */
 
-
 #ifndef JVM_MD_H
 #define JVM_MD_H
 
@@ -59,6 +58,7 @@
 
 #include <dirent.h>             /* For DIR */
 #include <sys/param.h>          /* For MAXPATHLEN */
+#include <sys/socket.h>         /* For socklen_t */
 #include <unistd.h>             /* For F_OK, R_OK, W_OK */
 
 #define JNI_ONLOAD_SYMBOLS      {"JNI_OnLoad"}
@@ -128,8 +128,4 @@
 #endif
 #endif /* JVM_MD_H */
 
-// Reconciliation History
-// jvm_solaris.h        1.6 99/06/22 16:38:47
-// End
-
 #endif // OS_BSD_VM_JVM_BSD_H
--- a/src/os/bsd/vm/osThread_bsd.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/os/bsd/vm/osThread_bsd.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -49,7 +49,11 @@
 
 void OSThread::pd_initialize() {
   assert(this != NULL, "check");
+#ifdef __APPLE__
+  _thread_id        = 0;
+#else
   _thread_id        = NULL;
+#endif
   _pthread_id       = NULL;
   _siginfo = NULL;
   _ucontext = NULL;
--- a/src/os/bsd/vm/osThread_bsd.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/os/bsd/vm/osThread_bsd.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -40,10 +40,17 @@
  private:
 
 #ifdef _ALLBSD_SOURCE
-  // _thread_id and _pthread_id are the same on BSD
-  // keep both to minimize code divergence in os_bsd.cpp
+
+#ifdef __APPLE__
+  thread_t  _thread_id;
+#else
   pthread_t _thread_id;
+#endif
+
+  // _pthread_id is the pthread id, which is used by library calls
+  // (e.g. pthread_kill).
   pthread_t _pthread_id;
+
 #else
   // _thread_id is kernel thread id (similar to LWP id on Solaris). Each
   // thread has a unique thread_id (BsdThreads or NPTL). It can be used
@@ -64,9 +71,15 @@
   void    set_caller_sigmask(sigset_t sigmask)  { _caller_sigmask = sigmask; }
 
 #ifdef _ALLBSD_SOURCE
+#ifdef __APPLE__
+  thread_t thread_id() const {
+    return _thread_id;
+  }
+#else
   pthread_t thread_id() const {
     return _thread_id;
   }
+#endif
 #else
   pid_t thread_id() const {
     return _thread_id;
@@ -84,9 +97,15 @@
   }
 #endif // ASSERT
 #ifdef _ALLBSD_SOURCE
+#ifdef __APPLE__
+  void set_thread_id(thread_t id) {
+    _thread_id = id;
+  }
+#else
   void set_thread_id(pthread_t id) {
     _thread_id = id;
   }
+#endif
 #else
   void set_thread_id(pid_t id) {
     _thread_id = id;
--- a/src/os/bsd/vm/os_bsd.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/os/bsd/vm/os_bsd.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -150,7 +150,6 @@
 
 // for timer info max values which include all bits
 #define ALL_64_BITS CONST64(0xFFFFFFFFFFFFFFFF)
-#define SEC_IN_NANOSECS  1000000000LL
 
 #define LARGEPAGES_BIT (1 << 6)
 ////////////////////////////////////////////////////////////////////////////////
@@ -302,6 +301,12 @@
 #error Add appropriate cpu_arch setting
 #endif
 
+// Compiler variant
+#ifdef COMPILER2
+#define COMPILER_VARIANT "server"
+#else
+#define COMPILER_VARIANT "client"
+#endif
 
 #ifndef _ALLBSD_SOURCE
 // pid_t gettid()
@@ -563,6 +568,25 @@
             sprintf(ld_library_path, "%s:%s", v, t);
             free(t);
         }
+
+#ifdef __APPLE__
+        // Apple's Java6 has "." at the beginning of java.library.path.
+        // OpenJDK on Windows has "." at the end of java.library.path.
+        // OpenJDK on Linux and Solaris don't have "." in java.library.path
+        // at all. To ease the transition from Apple's Java6 to OpenJDK7,
+        // "." is appended to the end of java.library.path. Yes, this
+        // could cause a change in behavior, but Apple's Java6 behavior
+        // can be achieved by putting "." at the beginning of the
+        // JAVA_LIBRARY_PATH environment variable.
+        {
+            char *t = ld_library_path;
+            // that's +3 for appending ":." and the trailing '\0'
+            ld_library_path = (char *) malloc(strlen(t) + 3);
+            sprintf(ld_library_path, "%s:%s", t, ".");
+            free(t);
+        }
+#endif
+
         Arguments::set_library_path(ld_library_path);
     }
 
@@ -974,8 +998,13 @@
   }
 
 #ifdef _ALLBSD_SOURCE
+#ifdef __APPLE__
+  // thread_id is mach thread on macos
+  osthread->set_thread_id(::mach_thread_self());
+#else
   // thread_id is pthread_id on BSD
   osthread->set_thread_id(::pthread_self());
+#endif
 #else
   // thread_id is kernel thread id (similar to Solaris LWP id)
   osthread->set_thread_id(os::Bsd::gettid());
@@ -1166,7 +1195,11 @@
 
   // Store pthread info into the OSThread
 #ifdef _ALLBSD_SOURCE
+#ifdef __APPLE__
+  osthread->set_thread_id(::mach_thread_self());
+#else
   osthread->set_thread_id(::pthread_self());
+#endif
 #else
   osthread->set_thread_id(os::Bsd::gettid());
 #endif
@@ -1783,7 +1816,13 @@
   return n;
 }
 
-intx os::current_thread_id() { return (intx)pthread_self(); }
+intx os::current_thread_id() {
+#ifdef __APPLE__
+  return (intx)::mach_thread_self();
+#else
+  return (intx)::pthread_self();
+#endif
+}
 int os::current_process_id() {
 
   // Under the old bsd thread library, bsd gives each thread
@@ -1921,7 +1960,7 @@
     return true;
   } else if (dlinfo.dli_fname != NULL && dlinfo.dli_fbase != 0) {
     if (Decoder::decode((address)(addr - (address)dlinfo.dli_fbase),
-       dlinfo.dli_fname, buf, buflen, offset) == Decoder::no_error) {
+       buf, buflen, offset, dlinfo.dli_fname)) {
        return true;
     }
   }
@@ -2508,7 +2547,7 @@
 
 static char saved_jvm_path[MAXPATHLEN] = {0};
 
-// Find the full path to the current module, libjvm.so or libjvm_g.so
+// Find the full path to the current module, libjvm or libjvm_g
 void os::jvm_path(char *buf, jint buflen) {
   // Error checking.
   if (buflen < MAXPATHLEN) {
@@ -2533,11 +2572,11 @@
 
   if (Arguments::created_by_gamma_launcher()) {
     // Support for the gamma launcher.  Typical value for buf is
-    // "<JAVA_HOME>/jre/lib/<arch>/<vmtype>/libjvm.so".  If "/jre/lib/" appears at
+    // "<JAVA_HOME>/jre/lib/<arch>/<vmtype>/libjvm".  If "/jre/lib/" appears at
     // the right place in the string, then assume we are installed in a JDK and
-    // we're done.  Otherwise, check for a JAVA_HOME environment variable and fix
-    // up the path so it looks like libjvm.so is installed there (append a
-    // fake suffix hotspot/libjvm.so).
+    // we're done.  Otherwise, check for a JAVA_HOME environment variable and
+    // construct a path to the JVM being overridden.
+
     const char *p = buf + strlen(buf) - 1;
     for (int count = 0; p > buf && count < 5; ++count) {
       for (--p; p > buf && *p != '/'; --p)
@@ -2551,7 +2590,7 @@
         char* jrelib_p;
         int len;
 
-        // Check the current module name "libjvm.so" or "libjvm_g.so".
+        // Check the current module name "libjvm" or "libjvm_g".
         p = strrchr(buf, '/');
         assert(strstr(p, "/libjvm") == p, "invalid library name");
         p = strstr(p, "_g") ? "_g" : "";
@@ -2564,19 +2603,32 @@
         // modules image doesn't have "jre" subdirectory
         len = strlen(buf);
         jrelib_p = buf + len;
-        snprintf(jrelib_p, buflen-len, "/jre/lib/%s", cpu_arch);
+
+        // Add the appropriate library subdir
+        snprintf(jrelib_p, buflen-len, "/jre/lib");
         if (0 != access(buf, F_OK)) {
-          snprintf(jrelib_p, buflen-len, "/lib/%s", cpu_arch);
+          snprintf(jrelib_p, buflen-len, "/lib");
         }
 
+        // Add the appropriate client or server subdir
+        len = strlen(buf);
+        jrelib_p = buf + len;
+        snprintf(jrelib_p, buflen-len, "/%s", COMPILER_VARIANT);
+        if (0 != access(buf, F_OK)) {
+          snprintf(jrelib_p, buflen-len, "");
+        }
+
+        // If the path exists within JAVA_HOME, add the JVM library name
+        // to complete the path to JVM being overridden.  Otherwise fallback
+        // to the path to the current library.
         if (0 == access(buf, F_OK)) {
-          // Use current module name "libjvm[_g].so" instead of
-          // "libjvm"debug_only("_g")".so" since for fastdebug version
-          // we should have "libjvm.so" but debug_only("_g") adds "_g"!
+          // Use current module name "libjvm[_g]" instead of
+          // "libjvm"debug_only("_g")"" since for fastdebug version
+          // we should have "libjvm" but debug_only("_g") adds "_g"!
           len = strlen(buf);
-          snprintf(buf + len, buflen-len, "/hotspot/libjvm%s.so", p);
+          snprintf(buf + len, buflen-len, "/libjvm%s%s", p, JNI_LIB_SUFFIX);
         } else {
-          // Go back to path of .so
+          // Fall back to path of current library
           rp = realpath(dli_fname, buf);
           if (rp == NULL)
             return;
@@ -2836,7 +2888,7 @@
 #endif
 }
 
-void os::free_memory(char *addr, size_t bytes) {
+void os::free_memory(char *addr, size_t bytes, size_t alignment_hint) {
   ::madvise(addr, bytes, MADV_DONTNEED);
 }
 
@@ -3445,8 +3497,6 @@
 // generates a SIGUSRx signal. Note that SIGUSR1 can interfere with
 // SIGSEGV, see 4355769.
 
-const int NANOSECS_PER_MILLISECS = 1000000;
-
 int os::sleep(Thread* thread, jlong millis, bool interruptible) {
   assert(thread == Thread::current(),  "thread consistency check");
 
@@ -3469,7 +3519,7 @@
         // not a guarantee() because JVM should not abort on kernel/glibc bugs
         assert(!Bsd::supports_monotonic_clock(), "time moving backwards");
       } else {
-        millis -= (newtime - prevtime) / NANOSECS_PER_MILLISECS;
+        millis -= (newtime - prevtime) / NANOSECS_PER_MILLISEC;
       }
 
       if(millis <= 0) {
@@ -3508,7 +3558,7 @@
         // not a guarantee() because JVM should not abort on kernel/glibc bugs
         assert(!Bsd::supports_monotonic_clock(), "time moving backwards");
       } else {
-        millis -= (newtime - prevtime) / NANOSECS_PER_MILLISECS;
+        millis -= (newtime - prevtime) / NANOSECS_PER_MILLISEC;
       }
 
       if(millis <= 0) break ;
@@ -3573,26 +3623,28 @@
 // It is only used when ThreadPriorityPolicy=1 and requires root privilege.
 
 #if defined(_ALLBSD_SOURCE) && !defined(__APPLE__)
-int os::java_to_os_priority[MaxPriority + 1] = {
+int os::java_to_os_priority[CriticalPriority + 1] = {
   19,              // 0 Entry should never be used
 
    0,              // 1 MinPriority
    3,              // 2
    6,              // 3
 
-   10,              // 4
-   15,              // 5 NormPriority
-   18,              // 6
-
-   21,              // 7
-   25,              // 8
-   28,              // 9 NearMaxPriority
-
-   31              // 10 MaxPriority
+  10,              // 4
+  15,              // 5 NormPriority
+  18,              // 6
+
+  21,              // 7
+  25,              // 8
+  28,              // 9 NearMaxPriority
+
+  31,              // 10 MaxPriority
+
+  31               // 11 CriticalPriority
 };
 #elif defined(__APPLE__)
 /* Using Mach high-level priority assignments */
-int os::java_to_os_priority[MaxPriority + 1] = {
+int os::java_to_os_priority[CriticalPriority + 1] = {
    0,              // 0 Entry should never be used (MINPRI_USER)
 
   27,              // 1 MinPriority
@@ -3607,10 +3659,12 @@
   34,              // 8
   35,              // 9 NearMaxPriority
 
-  36               // 10 MaxPriority
+  36,              // 10 MaxPriority
+
+  36               // 11 CriticalPriority
 };
 #else
-int os::java_to_os_priority[MaxPriority + 1] = {
+int os::java_to_os_priority[CriticalPriority + 1] = {
   19,              // 0 Entry should never be used
 
    4,              // 1 MinPriority
@@ -3625,7 +3679,9 @@
   -3,              // 8
   -4,              // 9 NearMaxPriority
 
-  -5               // 10 MaxPriority
+  -5,              // 10 MaxPriority
+
+  -5               // 11 CriticalPriority
 };
 #endif
 
@@ -3641,6 +3697,9 @@
       ThreadPriorityPolicy = 0;
     }
   }
+  if (UseCriticalJavaThreadPriority) {
+    os::java_to_os_priority[MaxPriority] = os::java_to_os_priority[CriticalPriority];
+  }
   return 0;
 }
 
@@ -4197,7 +4256,7 @@
   int rc = os::Bsd::clock_gettime(clockid, &tp);
   assert(rc == 0, "clock_gettime is expected to return 0 code");
 
-  return (tp.tv_sec * SEC_IN_NANOSECS) + tp.tv_nsec;
+  return (tp.tv_sec * NANOSECS_PER_SEC) + tp.tv_nsec;
 }
 #endif
 
@@ -5108,9 +5167,9 @@
   struct thread_basic_info tinfo;
   mach_msg_type_number_t tcount = THREAD_INFO_MAX;
   kern_return_t kr;
-  mach_port_t mach_thread;
-
-  mach_thread = pthread_mach_thread_np(thread->osthread()->thread_id());
+  thread_t mach_thread;
+
+  mach_thread = thread->osthread()->thread_id();
   kr = thread_info(mach_thread, THREAD_BASIC_INFO, (thread_info_t)&tinfo, &tcount);
   if (kr != KERN_SUCCESS)
     return -1;
@@ -5522,9 +5581,6 @@
  * is no need to track notifications.
  */
 
-
-#define NANOSECS_PER_SEC 1000000000
-#define NANOSECS_PER_MILLISEC 1000000
 #define MAX_SECS 100000000
 /*
  * This code is common to bsd and solaris and will be moved to a
--- a/src/os/bsd/vm/os_bsd.inline.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/os/bsd/vm/os_bsd.inline.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -198,15 +198,15 @@
   return ::socket(domain, type, protocol);
 }
 
-inline int os::recv(int fd, char *buf, int nBytes, int flags) {
-  RESTARTABLE_RETURN_INT(::recv(fd, buf, nBytes, (unsigned int) flags));
+inline int os::recv(int fd, char* buf, size_t nBytes, uint flags) {
+  RESTARTABLE_RETURN_INT(::recv(fd, buf, nBytes, flags));
 }
 
-inline int os::send(int fd, char *buf, int nBytes, int flags) {
-  RESTARTABLE_RETURN_INT(::send(fd, buf, nBytes, (unsigned int) flags));
+inline int os::send(int fd, char* buf, size_t nBytes, uint flags) {
+  RESTARTABLE_RETURN_INT(::send(fd, buf, nBytes, flags));
 }
 
-inline int os::raw_send(int fd, char *buf, int nBytes, int flags) {
+inline int os::raw_send(int fd, char* buf, size_t nBytes, uint flags) {
   return os::send(fd, buf, nBytes, flags);
 }
 
@@ -246,57 +246,52 @@
   return ::listen(fd, count);
 }
 
-inline int os::connect(int fd, struct sockaddr *him, int len) {
+inline int os::connect(int fd, struct sockaddr* him, socklen_t len) {
   RESTARTABLE_RETURN_INT(::connect(fd, him, len));
 }
 
-inline int os::accept(int fd, struct sockaddr *him, int *len) {
-  // This cast is from int to unsigned int on bsd.  Since we
-  // only pass the parameter "len" around the vm and don't try to
-  // fetch it's value, this cast is safe for now. The java.net group
-  // may need and want to change this interface someday if socklen_t goes
-  // to 64 bits on some platform that we support.
-
+inline int os::accept(int fd, struct sockaddr* him, socklen_t* len) {
   // At least OpenBSD and FreeBSD can return EINTR from accept.
-  RESTARTABLE_RETURN_INT(::accept(fd, him, (socklen_t *)len));
+  RESTARTABLE_RETURN_INT(::accept(fd, him, len));
 }
 
-inline int os::recvfrom(int fd, char *buf, int nBytes, int flags,
-                         sockaddr *from, int *fromlen) {
-  RESTARTABLE_RETURN_INT(::recvfrom(fd, buf, nBytes, (unsigned int) flags, from, (socklen_t *)fromlen));
+inline int os::recvfrom(int fd, char* buf, size_t nBytes, uint flags,
+                         sockaddr* from, socklen_t* fromlen) {
+  RESTARTABLE_RETURN_INT((int)::recvfrom(fd, buf, nBytes, flags, from, fromlen));
 }
 
-inline int os::sendto(int fd, char *buf, int len, int flags,
-                        struct sockaddr *to, int tolen) {
-  RESTARTABLE_RETURN_INT(::sendto(fd, buf, len, (unsigned int) flags, to, tolen));
+inline int os::sendto(int fd, char* buf, size_t len, uint flags,
+                      struct sockaddr *to, socklen_t tolen) {
+  RESTARTABLE_RETURN_INT((int)::sendto(fd, buf, len, flags, to, tolen));
 }
 
-inline int os::socket_shutdown(int fd, int howto){
+inline int os::socket_shutdown(int fd, int howto) {
   return ::shutdown(fd, howto);
 }
 
-inline int os::bind(int fd, struct sockaddr *him, int len){
+inline int os::bind(int fd, struct sockaddr* him, socklen_t len) {
   return ::bind(fd, him, len);
 }
 
-inline int os::get_sock_name(int fd, struct sockaddr *him, int *len){
-  return ::getsockname(fd, him, (socklen_t *)len);
+inline int os::get_sock_name(int fd, struct sockaddr* him, socklen_t* len) {
+  return ::getsockname(fd, him, len);
 }
 
-inline int os::get_host_name(char* name, int namelen){
+inline int os::get_host_name(char* name, int namelen) {
   return ::gethostname(name, namelen);
 }
 
-inline struct hostent*  os::get_host_by_name(char* name) {
+inline struct hostent* os::get_host_by_name(char* name) {
   return ::gethostbyname(name);
 }
+
 inline int os::get_sock_opt(int fd, int level, int optname,
-                             char *optval, int* optlen){
-  return ::getsockopt(fd, level, optname, optval, (socklen_t *)optlen);
+                            char *optval, socklen_t* optlen) {
+  return ::getsockopt(fd, level, optname, optval, optlen);
 }
 
 inline int os::set_sock_opt(int fd, int level, int optname,
-                             const char *optval, int optlen){
+                            const char* optval, socklen_t optlen) {
   return ::setsockopt(fd, level, optname, optval, optlen);
 }
 #endif // OS_BSD_VM_OS_BSD_INLINE_HPP
--- a/src/os/linux/vm/decoder_linux.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/os/linux/vm/decoder_linux.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -23,11 +23,11 @@
  */
 
 #include "prims/jvm.h"
-#include "utilities/decoder.hpp"
+#include "utilities/decoder_elf.hpp"
 
 #include <cxxabi.h>
 
-bool Decoder::demangle(const char* symbol, char *buf, int buflen) {
+bool ElfDecoder::demangle(const char* symbol, char *buf, int buflen) {
   int   status;
   char* result;
   size_t size = (size_t)buflen;
@@ -43,3 +43,4 @@
   }
   return false;
 }
+
--- a/src/os/linux/vm/jvm_linux.h	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/os/linux/vm/jvm_linux.h	Mon Feb 27 15:06:36 2012 -0800
@@ -33,7 +33,6 @@
 // All local includes have been commented out.
 */
 
-
 #ifndef JVM_MD_H
 #define JVM_MD_H
 
@@ -44,6 +43,7 @@
 
 #include <dirent.h>             /* For DIR */
 #include <sys/param.h>          /* For MAXPATHLEN */
+#include <sys/socket.h>         /* For socklen_t */
 #include <unistd.h>             /* For F_OK, R_OK, W_OK */
 
 #define JNI_ONLOAD_SYMBOLS      {"JNI_OnLoad"}
@@ -95,8 +95,4 @@
 
 #endif /* JVM_MD_H */
 
-// Reconciliation History
-// jvm_solaris.h        1.6 99/06/22 16:38:47
-// End
-
 #endif // OS_LINUX_VM_JVM_LINUX_H
--- a/src/os/linux/vm/os_linux.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/os/linux/vm/os_linux.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -127,7 +127,6 @@
 
 // for timer info max values which include all bits
 #define ALL_64_BITS CONST64(0xFFFFFFFFFFFFFFFF)
-#define SEC_IN_NANOSECS  1000000000LL
 
 #define LARGEPAGES_BIT (1 << 6)
 ////////////////////////////////////////////////////////////////////////////////
@@ -1733,7 +1732,7 @@
     return true;
   } else if (dlinfo.dli_fname != NULL && dlinfo.dli_fbase != 0) {
     if (Decoder::decode((address)(addr - (address)dlinfo.dli_fbase),
-       dlinfo.dli_fname, buf, buflen, offset) == Decoder::no_error) {
+        buf, buflen, offset, dlinfo.dli_fname)) {
        return true;
     }
   }
@@ -2547,8 +2546,8 @@
   }
 }
 
-void os::free_memory(char *addr, size_t bytes) {
-  commit_memory(addr, bytes, false);
+void os::free_memory(char *addr, size_t bytes, size_t alignment_hint) {
+  commit_memory(addr, bytes, alignment_hint, false);
 }
 
 void os::numa_make_global(char *addr, size_t bytes) {
@@ -3259,8 +3258,6 @@
 // generates a SIGUSRx signal. Note that SIGUSR1 can interfere with
 // SIGSEGV, see 4355769.
 
-const int NANOSECS_PER_MILLISECS = 1000000;
-
 int os::sleep(Thread* thread, jlong millis, bool interruptible) {
   assert(thread == Thread::current(),  "thread consistency check");
 
@@ -3283,7 +3280,7 @@
         // not a guarantee() because JVM should not abort on kernel/glibc bugs
         assert(!Linux::supports_monotonic_clock(), "time moving backwards");
       } else {
-        millis -= (newtime - prevtime) / NANOSECS_PER_MILLISECS;
+        millis -= (newtime - prevtime) / NANOSECS_PER_MILLISEC;
       }
 
       if(millis <= 0) {
@@ -3322,7 +3319,7 @@
         // not a guarantee() because JVM should not abort on kernel/glibc bugs
         assert(!Linux::supports_monotonic_clock(), "time moving backwards");
       } else {
-        millis -= (newtime - prevtime) / NANOSECS_PER_MILLISECS;
+        millis -= (newtime - prevtime) / NANOSECS_PER_MILLISEC;
       }
 
       if(millis <= 0) break ;
@@ -3386,7 +3383,7 @@
 // this reason, the code should not be used as default (ThreadPriorityPolicy=0).
 // It is only used when ThreadPriorityPolicy=1 and requires root privilege.
 
-int os::java_to_os_priority[MaxPriority + 1] = {
+int os::java_to_os_priority[CriticalPriority + 1] = {
   19,              // 0 Entry should never be used
 
    4,              // 1 MinPriority
@@ -3401,7 +3398,9 @@
   -3,              // 8
   -4,              // 9 NearMaxPriority
 
-  -5               // 10 MaxPriority
+  -5,              // 10 MaxPriority
+
+  -5               // 11 CriticalPriority
 };
 
 static int prio_init() {
@@ -3416,6 +3415,9 @@
       ThreadPriorityPolicy = 0;
     }
   }
+  if (UseCriticalJavaThreadPriority) {
+    os::java_to_os_priority[MaxPriority] = os::java_to_os_priority[CriticalPriority];
+  }
   return 0;
 }
 
@@ -3931,7 +3933,7 @@
   int rc = os::Linux::clock_gettime(clockid, &tp);
   assert(rc == 0, "clock_gettime is expected to return 0 code");
 
-  return (tp.tv_sec * SEC_IN_NANOSECS) + tp.tv_nsec;
+  return (tp.tv_sec * NANOSECS_PER_SEC) + tp.tv_nsec;
 }
 
 /////
@@ -5172,9 +5174,6 @@
  * is no need to track notifications.
  */
 
-
-#define NANOSECS_PER_SEC 1000000000
-#define NANOSECS_PER_MILLISEC 1000000
 #define MAX_SECS 100000000
 /*
  * This code is common to linux and solaris and will be moved to a
--- a/src/os/linux/vm/os_linux.inline.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/os/linux/vm/os_linux.inline.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -202,15 +202,15 @@
   return ::socket(domain, type, protocol);
 }
 
-inline int os::recv(int fd, char *buf, int nBytes, int flags) {
-  RESTARTABLE_RETURN_INT(::recv(fd, buf, nBytes, (unsigned int) flags));
+inline int os::recv(int fd, char* buf, size_t nBytes, uint flags) {
+  RESTARTABLE_RETURN_INT(::recv(fd, buf, nBytes, flags));
 }
 
-inline int os::send(int fd, char *buf, int nBytes, int flags) {
-  RESTARTABLE_RETURN_INT(::send(fd, buf, nBytes, (unsigned int) flags));
+inline int os::send(int fd, char* buf, size_t nBytes, uint flags) {
+  RESTARTABLE_RETURN_INT(::send(fd, buf, nBytes, flags));
 }
 
-inline int os::raw_send(int fd, char *buf, int nBytes, int flags) {
+inline int os::raw_send(int fd, char* buf, size_t nBytes, uint flags) {
   return os::send(fd, buf, nBytes, flags);
 }
 
@@ -250,57 +250,53 @@
   return ::listen(fd, count);
 }
 
-inline int os::connect(int fd, struct sockaddr *him, int len) {
+inline int os::connect(int fd, struct sockaddr* him, socklen_t len) {
   RESTARTABLE_RETURN_INT(::connect(fd, him, len));
 }
 
-inline int os::accept(int fd, struct sockaddr *him, int *len) {
-  // This cast is from int to unsigned int on linux.  Since we
-  // only pass the parameter "len" around the vm and don't try to
-  // fetch it's value, this cast is safe for now. The java.net group
-  // may need and want to change this interface someday if socklen_t goes
-  // to 64 bits on some platform that we support.
-  // Linux doc says this can't return EINTR, unlike accept() on Solaris
-
-  return ::accept(fd, him, (socklen_t *)len);
+inline int os::accept(int fd, struct sockaddr* him, socklen_t* len) {
+  // Linux doc says this can't return EINTR, unlike accept() on Solaris.
+  // But see attachListener_linux.cpp, LinuxAttachListener::dequeue().
+  return (int)::accept(fd, him, len);
 }
 
-inline int os::recvfrom(int fd, char *buf, int nBytes, int flags,
-                         sockaddr *from, int *fromlen) {
-  RESTARTABLE_RETURN_INT(::recvfrom(fd, buf, nBytes, (unsigned int) flags, from, (socklen_t *)fromlen));
+inline int os::recvfrom(int fd, char* buf, size_t nBytes, uint flags,
+                        sockaddr* from, socklen_t* fromlen) {
+  RESTARTABLE_RETURN_INT((int)::recvfrom(fd, buf, nBytes, flags, from, fromlen));
 }
 
-inline int os::sendto(int fd, char *buf, int len, int flags,
-                        struct sockaddr *to, int tolen) {
-  RESTARTABLE_RETURN_INT(::sendto(fd, buf, len, (unsigned int) flags, to, tolen));
+inline int os::sendto(int fd, char* buf, size_t len, uint flags,
+                      struct sockaddr* to, socklen_t tolen) {
+  RESTARTABLE_RETURN_INT((int)::sendto(fd, buf, len, flags, to, tolen));
 }
 
-inline int os::socket_shutdown(int fd, int howto){
+inline int os::socket_shutdown(int fd, int howto) {
   return ::shutdown(fd, howto);
 }
 
-inline int os::bind(int fd, struct sockaddr *him, int len){
+inline int os::bind(int fd, struct sockaddr* him, socklen_t len) {
   return ::bind(fd, him, len);
 }
 
-inline int os::get_sock_name(int fd, struct sockaddr *him, int *len){
-  return ::getsockname(fd, him, (socklen_t *)len);
+inline int os::get_sock_name(int fd, struct sockaddr* him, socklen_t* len) {
+  return ::getsockname(fd, him, len);
 }
 
-inline int os::get_host_name(char* name, int namelen){
+inline int os::get_host_name(char* name, int namelen) {
   return ::gethostname(name, namelen);
 }
 
-inline struct hostent*  os::get_host_by_name(char* name) {
+inline struct hostent* os::get_host_by_name(char* name) {
   return ::gethostbyname(name);
 }
+
 inline int os::get_sock_opt(int fd, int level, int optname,
-                             char *optval, int* optlen){
-  return ::getsockopt(fd, level, optname, optval, (socklen_t *)optlen);
+                            char* optval, socklen_t* optlen) {
+  return ::getsockopt(fd, level, optname, optval, optlen);
 }
 
 inline int os::set_sock_opt(int fd, int level, int optname,
-                             const char *optval, int optlen){
+                            const char* optval, socklen_t optlen) {
   return ::setsockopt(fd, level, optname, optval, optlen);
 }
 #endif // OS_LINUX_VM_OS_LINUX_INLINE_HPP
--- a/src/os/posix/launcher/java_md.c	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/os/posix/launcher/java_md.c	Mon Feb 27 15:06:36 2012 -0800
@@ -701,6 +701,14 @@
     char libjava[MAXPATHLEN];
 
     if (GetApplicationHome(path, pathsize)) {
+
+        /* Is the JRE universal, i.e. no arch dir? */
+        sprintf(libjava, "%s/jre/lib/" JAVA_DLL, path);
+        if (access(libjava, F_OK) == 0) {
+            strcat(path, "/jre");
+            goto found;
+        }
+
         /* Is JRE co-located with the application? */
         sprintf(libjava, "%s/lib/%s/" JAVA_DLL, path, arch);
         if (access(libjava, F_OK) == 0) {
@@ -734,7 +742,7 @@
     ifn->GetDefaultJavaVMInitArgs = JNI_GetDefaultJavaVMInitArgs;
     return JNI_TRUE;
 #else
-   Dl_info dlinfo;
+    Dl_info dlinfo;
     void *libjvm;
 
     if (_launcher_debug) {
--- a/src/os/posix/vm/os_posix.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/os/posix/vm/os_posix.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -59,6 +59,10 @@
   VMError::report_coredump_status(buffer, success);
 }
 
+int os::get_last_error() {
+  return errno;
+}
+
 bool os::is_debugger_attached() {
   // not implemented
   return false;
--- a/src/os/solaris/vm/decoder_solaris.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/os/solaris/vm/decoder_solaris.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -22,10 +22,11 @@
  *
  */
 
-#include "utilities/decoder.hpp"
+#include "utilities/decoder_elf.hpp"
 
 #include <demangle.h>
 
-bool Decoder::demangle(const char* symbol, char *buf, int buflen) {
+bool ElfDecoder::demangle(const char* symbol, char *buf, int buflen) {
   return !cplus_demangle(symbol, buf, (size_t)buflen);
 }
+
--- a/src/os/solaris/vm/jvm_solaris.h	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/os/solaris/vm/jvm_solaris.h	Mon Feb 27 15:06:36 2012 -0800
@@ -33,7 +33,6 @@
 // All local includes have been commented out.
 */
 
-
 #ifndef JVM_MD_H
 #define JVM_MD_H
 
@@ -44,6 +43,7 @@
 
 #include <dirent.h>             /* For DIR */
 #include <sys/param.h>          /* For MAXPATHLEN */
+#include <sys/socket.h>         /* For socklen_t */
 #include <unistd.h>             /* For F_OK, R_OK, W_OK */
 #include <sys/int_types.h>      /* for intptr_t types (64 Bit cleanliness) */
 
@@ -82,7 +82,6 @@
 #define JVM_O_EXCL       O_EXCL
 #define JVM_O_CREAT      O_CREAT
 
-
 /* Signal definitions */
 
 #define BREAK_SIGNAL     SIGQUIT           /* Thread dumping support.    */
--- a/src/os/solaris/vm/osThread_solaris.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/os/solaris/vm/osThread_solaris.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -28,17 +28,17 @@
 // This is embedded via include into the class OSThread
 
  private:
-
-  thread_t _thread_id;      // Solaris thread id
-  unsigned int  _lwp_id;    // lwp ID, only used with bound threads
-  sigset_t _caller_sigmask; // Caller's signal mask
-  bool _vm_created_thread;  // true if the VM create this thread
-                            // false if primary thread or attached thread
+  thread_t _thread_id;         // Solaris thread id
+  uint     _lwp_id;            // lwp ID, only used with bound threads
+  int      _native_priority;   // Saved native priority when starting
+                               // a bound thread
+  sigset_t _caller_sigmask;    // Caller's signal mask
+  bool     _vm_created_thread; // true if the VM created this thread,
+                               // false if primary thread or attached thread
  public:
-
-  thread_t thread_id() const      { return _thread_id; }
-
-  unsigned int lwp_id() const     { return _lwp_id; }
+  thread_t thread_id() const       { return _thread_id; }
+  uint     lwp_id() const          { return _lwp_id; }
+  int      native_priority() const { return _native_priority; }
 
   // Set and get state of _vm_created_thread flag
   void set_vm_created()           { _vm_created_thread = true; }
@@ -62,8 +62,9 @@
     return true;
   }
 #endif
-  void set_thread_id(thread_t id) { _thread_id = id;   }
-  void set_lwp_id(unsigned int id){ _lwp_id = id;   }
+  void set_thread_id(thread_t id)    { _thread_id = id; }
+  void set_lwp_id(uint id)           { _lwp_id = id; }
+  void set_native_priority(int prio) { _native_priority = prio; }
 
  // ***************************************************************
  // interrupt support.  interrupts (using signals) are used to get
--- a/src/os/solaris/vm/os_solaris.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/os/solaris/vm/os_solaris.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -114,6 +114,7 @@
 # include <sys/rtpriocntl.h>
 # include <sys/tspriocntl.h>
 # include <sys/iapriocntl.h>
+# include <sys/fxpriocntl.h>
 # include <sys/loadavg.h>
 # include <string.h>
 # include <stdio.h>
@@ -129,8 +130,8 @@
 #ifdef _GNU_SOURCE
 // See bug #6514594
 extern "C" int madvise(caddr_t, size_t, int);
-extern "C"  int memcntl(caddr_t addr, size_t len, int cmd, caddr_t  arg,
-     int attr, int mask);
+extern "C" int memcntl(caddr_t addr, size_t len, int cmd, caddr_t arg,
+                       int attr, int mask);
 #endif //_GNU_SOURCE
 
 /*
@@ -215,8 +216,9 @@
 #define MaximumPriority 127
 
 // Values for ThreadPriorityPolicy == 1
-int prio_policy1[MaxPriority+1] = { -99999, 0, 16, 32, 48, 64,
-                                        80, 96, 112, 124, 127 };
+int prio_policy1[CriticalPriority+1] = {
+  -99999,  0, 16,  32,  48,  64,
+          80, 96, 112, 124, 127, 127 };
 
 // System parameters used internally
 static clock_t clock_tics_per_sec = 100;
@@ -1048,15 +1050,22 @@
   }
 
   // If the creator called set priority before we started,
-  // we need to call set priority now that we have an lwp.
-  // Get the priority from libthread and set the priority
-  // for the new Solaris lwp.
+  // we need to call set_native_priority now that we have an lwp.
+  // We used to get the priority from thr_getprio (we called
+  // thr_setprio way back in create_thread) and pass it to
+  // set_native_priority, but Solaris scales the priority
+  // in java_to_os_priority, so when we read it back here,
+  // we pass trash to set_native_priority instead of what's
+  // in java_to_os_priority. So we save the native priority
+  // in the osThread and recall it here.
+
   if ( osthr->thread_id() != -1 ) {
     if ( UseThreadPriorities ) {
-      thr_getprio(osthr->thread_id(), &prio);
+      int prio = osthr->native_priority();
       if (ThreadPriorityVerbose) {
-        tty->print_cr("Starting Thread " INTPTR_FORMAT ", LWP is " INTPTR_FORMAT ", setting priority: %d\n",
-                      osthr->thread_id(), osthr->lwp_id(), prio );
+        tty->print_cr("Starting Thread " INTPTR_FORMAT ", LWP is "
+                      INTPTR_FORMAT ", setting priority: %d\n",
+                      osthr->thread_id(), osthr->lwp_id(), prio);
       }
       os::set_native_priority(thread, prio);
     }
@@ -1353,13 +1362,12 @@
   // Remember that we created this thread so we can set priority on it
   osthread->set_vm_created();
 
-  // Set the default thread priority otherwise use NormalPriority
-
-  if ( UseThreadPriorities ) {
-     thr_setprio(tid, (DefaultThreadPriority == -1) ?
+  // Set the default thread priority.  If using bound threads, setting
+  // lwp priority will be delayed until thread start.
+  set_native_priority(thread,
+                      DefaultThreadPriority == -1 ?
                         java_to_os_priority[NormPriority] :
                         DefaultThreadPriority);
-  }
 
   // Initial thread state is INITIALIZED, not SUSPENDED
   osthread->set_state(INITIALIZED);
@@ -1674,7 +1682,6 @@
 }
 
 
-const int NANOSECS_PER_MILLISECS = 1000000;
 // gethrtime can move backwards if read from one cpu and then a different cpu
 // getTimeNanos is guaranteed to not move backward on Solaris
 // local spinloop created as faster for a CAS on an int than
@@ -1803,7 +1810,7 @@
 // getTimeMillis guaranteed to not move backwards on Solaris
 jlong getTimeMillis() {
   jlong nanotime = getTimeNanos();
-  return (jlong)(nanotime / NANOSECS_PER_MILLISECS);
+  return (jlong)(nanotime / NANOSECS_PER_MILLISEC);
 }
 
 // Must return millis since Jan 1 1970 for JVM_CurrentTimeMillis
@@ -1998,7 +2005,7 @@
       }
       if (dlinfo.dli_fname != NULL && dlinfo.dli_fbase != 0) {
         if (Decoder::decode((address)(addr - (address)dlinfo.dli_fbase),
-          dlinfo.dli_fname, buf, buflen, offset) == Decoder::no_error) {
+           buf, buflen, offset, dlinfo.dli_fname)) {
           return true;
         }
       }
@@ -2016,7 +2023,7 @@
         return true;
       } else if (dlinfo.dli_fname != NULL && dlinfo.dli_fbase != 0) {
         if (Decoder::decode((address)(addr - (address)dlinfo.dli_fbase),
-          dlinfo.dli_fname, buf, buflen, offset) == Decoder::no_error) {
+          buf, buflen, offset, dlinfo.dli_fname)) {
           return true;
         }
       }
@@ -2822,7 +2829,7 @@
 }
 
 // Uncommit the pages in a specified region.
-void os::free_memory(char* addr, size_t bytes) {
+void os::free_memory(char* addr, size_t bytes, size_t alignment_hint) {
   if (madvise(addr, bytes, MADV_FREE) < 0) {
     debug_only(warning("MADV_FREE failed."));
     return;
@@ -3729,7 +3736,7 @@
 } SchedInfo;
 
 
-static SchedInfo tsLimits, iaLimits, rtLimits;
+static SchedInfo tsLimits, iaLimits, rtLimits, fxLimits;
 
 #ifdef ASSERT
 static int  ReadBackValidate = 1;
@@ -3740,6 +3747,8 @@
 static int  myCur       = 0;
 static bool priocntl_enable = false;
 
+static const int criticalPrio = 60; // FX/60 is critical thread class/priority on T4
+static int java_MaxPriority_to_os_priority = 0; // Saved mapping
 
 // Call the version of priocntl suitable for all supported versions
 // of Solaris. We need to call through this wrapper so that we can
@@ -3784,19 +3793,27 @@
   if (os::Solaris::T2_libthread() || UseBoundThreads) {
     // If ThreadPriorityPolicy is 1, switch tables
     if (ThreadPriorityPolicy == 1) {
-      for (i = 0 ; i < MaxPriority+1; i++)
+      for (i = 0 ; i < CriticalPriority+1; i++)
         os::java_to_os_priority[i] = prio_policy1[i];
     }
+    if (UseCriticalJavaThreadPriority) {
+      // MaxPriority always maps to the FX scheduling class and criticalPrio.
+      // See set_native_priority() and set_lwp_class_and_priority().
+      // Save original MaxPriority mapping in case attempt to
+      // use critical priority fails.
+      java_MaxPriority_to_os_priority = os::java_to_os_priority[MaxPriority];
+      // Set negative to distinguish from other priorities
+      os::java_to_os_priority[MaxPriority] = -criticalPrio;
+    }
   }
   // Not using Bound Threads, set to ThreadPolicy 1
   else {
-    for ( i = 0 ; i < MaxPriority+1; i++ ) {
+    for ( i = 0 ; i < CriticalPriority+1; i++ ) {
       os::java_to_os_priority[i] = prio_policy1[i];
     }
     return 0;
   }
 
-
   // Get IDs for a set of well-known scheduling classes.
   // TODO-FIXME: GETCLINFO returns the current # of classes in the
   // the system.  We should have a loop that iterates over the
@@ -3829,24 +3846,33 @@
   rtLimits.maxPrio = ((rtinfo_t*)ClassInfo.pc_clinfo)->rt_maxpri;
   rtLimits.minPrio = 0;
 
+  strcpy(ClassInfo.pc_clname, "FX");
+  ClassInfo.pc_cid = -1;
+  rslt = (*priocntl_ptr)(PC_VERSION, P_ALL, 0, PC_GETCID, (caddr_t)&ClassInfo);
+  if (rslt < 0) return errno;
+  assert(ClassInfo.pc_cid != -1, "cid for FX class is -1");
+  fxLimits.schedPolicy = ClassInfo.pc_cid;
+  fxLimits.maxPrio = ((fxinfo_t*)ClassInfo.pc_clinfo)->fx_maxupri;
+  fxLimits.minPrio = 0;
 
   // Query our "current" scheduling class.
-  // This will normally be IA,TS or, rarely, RT.
-  memset (&ParmInfo, 0, sizeof(ParmInfo));
+  // This will normally be IA, TS or, rarely, FX or RT.
+  memset(&ParmInfo, 0, sizeof(ParmInfo));
   ParmInfo.pc_cid = PC_CLNULL;
-  rslt = (*priocntl_ptr) (PC_VERSION, P_PID, P_MYID, PC_GETPARMS, (caddr_t)&ParmInfo );
-  if ( rslt < 0 ) return errno;
+  rslt = (*priocntl_ptr) (PC_VERSION, P_PID, P_MYID, PC_GETPARMS, (caddr_t)&ParmInfo);
+  if (rslt < 0) return errno;
   myClass = ParmInfo.pc_cid;
 
   // We now know our scheduling classId, get specific information
-  // the class.
+  // about the class.
   ClassInfo.pc_cid = myClass;
   ClassInfo.pc_clname[0] = 0;
-  rslt = (*priocntl_ptr) (PC_VERSION, (idtype)0, 0, PC_GETCLINFO, (caddr_t)&ClassInfo );
-  if ( rslt < 0 ) return errno;
-
-  if (ThreadPriorityVerbose)
-    tty->print_cr ("lwp_priocntl_init: Class=%d(%s)...", myClass, ClassInfo.pc_clname);
+  rslt = (*priocntl_ptr) (PC_VERSION, (idtype)0, 0, PC_GETCLINFO, (caddr_t)&ClassInfo);
+  if (rslt < 0) return errno;
+
+  if (ThreadPriorityVerbose) {
+    tty->print_cr("lwp_priocntl_init: Class=%d(%s)...", myClass, ClassInfo.pc_clname);
+  }
 
   memset(&ParmInfo, 0, sizeof(pcparms_t));
   ParmInfo.pc_cid = PC_CLNULL;
@@ -3866,6 +3892,11 @@
     myMin = tsLimits.minPrio;
     myMax = tsLimits.maxPrio;
     myMax = MIN2(myMax, (int)tsInfo->ts_uprilim);       // clamp - restrict
+  } else if (ParmInfo.pc_cid == fxLimits.schedPolicy) {
+    fxparms_t *fxInfo = (fxparms_t*)ParmInfo.pc_clparms;
+    myMin = fxLimits.minPrio;
+    myMax = fxLimits.maxPrio;
+    myMax = MIN2(myMax, (int)fxInfo->fx_uprilim);       // clamp - restrict
   } else {
     // No clue - punt
     if (ThreadPriorityVerbose)
@@ -3873,8 +3904,9 @@
     return EINVAL;      // no clue, punt
   }
 
-  if (ThreadPriorityVerbose)
-        tty->print_cr ("Thread priority Range: [%d..%d]\n", myMin, myMax);
+  if (ThreadPriorityVerbose) {
+    tty->print_cr ("Thread priority Range: [%d..%d]\n", myMin, myMax);
+  }
 
   priocntl_enable = true;  // Enable changing priorities
   return 0;
@@ -3883,6 +3915,7 @@
 #define IAPRI(x)        ((iaparms_t *)((x).pc_clparms))
 #define RTPRI(x)        ((rtparms_t *)((x).pc_clparms))
 #define TSPRI(x)        ((tsparms_t *)((x).pc_clparms))
+#define FXPRI(x)        ((fxparms_t *)((x).pc_clparms))
 
 
 // scale_to_lwp_priority
@@ -3901,13 +3934,13 @@
 }
 
 
-// set_lwp_priority
+// set_lwp_class_and_priority
 //
-// Set the priority of the lwp.  This call should only be made
-// when using bound threads (T2 threads are bound by default).
+// Set the class and priority of the lwp.  This call should only
+// be made when using bound threads (T2 threads are bound by default).
 //
-int     set_lwp_priority (int ThreadID, int lwpid, int newPrio )
-{
+int set_lwp_class_and_priority(int ThreadID, int lwpid,
+                               int newPrio, int new_class, bool scale) {
   int rslt;
   int Actual, Expected, prv;
   pcparms_t ParmInfo;                   // for GET-SET
@@ -3928,19 +3961,20 @@
     return EINVAL;
   }
 
-
   // If lwp hasn't started yet, just return
   // the _start routine will call us again.
   if ( lwpid <= 0 ) {
     if (ThreadPriorityVerbose) {
-      tty->print_cr ("deferring the set_lwp_priority of thread " INTPTR_FORMAT " to %d, lwpid not set",
+      tty->print_cr ("deferring the set_lwp_class_and_priority of thread "
+                     INTPTR_FORMAT " to %d, lwpid not set",
                      ThreadID, newPrio);
     }
     return 0;
   }
 
   if (ThreadPriorityVerbose) {
-    tty->print_cr ("set_lwp_priority(" INTPTR_FORMAT "@" INTPTR_FORMAT " %d) ",
+    tty->print_cr ("set_lwp_class_and_priority("
+                   INTPTR_FORMAT "@" INTPTR_FORMAT " %d) ",
                    ThreadID, lwpid, newPrio);
   }
 
@@ -3949,40 +3983,69 @@
   rslt = (*priocntl_ptr)(PC_VERSION, P_LWPID, lwpid, PC_GETPARMS, (caddr_t)&ParmInfo);
   if (rslt < 0) return errno;
 
-  if (ParmInfo.pc_cid == rtLimits.schedPolicy) {
+  int cur_class = ParmInfo.pc_cid;
+  ParmInfo.pc_cid = (id_t)new_class;
+
+  if (new_class == rtLimits.schedPolicy) {
     rtparms_t *rtInfo  = (rtparms_t*)ParmInfo.pc_clparms;
-    rtInfo->rt_pri     = scale_to_lwp_priority (rtLimits.minPrio, rtLimits.maxPrio, newPrio);
+    rtInfo->rt_pri     = scale ? scale_to_lwp_priority(rtLimits.minPrio,
+                                                       rtLimits.maxPrio, newPrio)
+                               : newPrio;
     rtInfo->rt_tqsecs  = RT_NOCHANGE;
     rtInfo->rt_tqnsecs = RT_NOCHANGE;
     if (ThreadPriorityVerbose) {
       tty->print_cr("RT: %d->%d\n", newPrio, rtInfo->rt_pri);
     }
-  } else if (ParmInfo.pc_cid == iaLimits.schedPolicy) {
-    iaparms_t *iaInfo  = (iaparms_t*)ParmInfo.pc_clparms;
-    int maxClamped     = MIN2(iaLimits.maxPrio, (int)iaInfo->ia_uprilim);
-    iaInfo->ia_upri    = scale_to_lwp_priority(iaLimits.minPrio, maxClamped, newPrio);
-    iaInfo->ia_uprilim = IA_NOCHANGE;
+  } else if (new_class == iaLimits.schedPolicy) {
+    iaparms_t* iaInfo  = (iaparms_t*)ParmInfo.pc_clparms;
+    int maxClamped     = MIN2(iaLimits.maxPrio,
+                              cur_class == new_class
+                                ? (int)iaInfo->ia_uprilim : iaLimits.maxPrio);
+    iaInfo->ia_upri    = scale ? scale_to_lwp_priority(iaLimits.minPrio,
+                                                       maxClamped, newPrio)
+                               : newPrio;
+    iaInfo->ia_uprilim = cur_class == new_class
+                           ? IA_NOCHANGE : (pri_t)iaLimits.maxPrio;
     iaInfo->ia_mode    = IA_NOCHANGE;
     if (ThreadPriorityVerbose) {
-      tty->print_cr ("IA: [%d...%d] %d->%d\n",
-               iaLimits.minPrio, maxClamped, newPrio, iaInfo->ia_upri);
+      tty->print_cr("IA: [%d...%d] %d->%d\n",
+                    iaLimits.minPrio, maxClamped, newPrio, iaInfo->ia_upri);
     }
-  } else if (ParmInfo.pc_cid == tsLimits.schedPolicy) {
-    tsparms_t *tsInfo  = (tsparms_t*)ParmInfo.pc_clparms;
-    int maxClamped     = MIN2(tsLimits.maxPrio, (int)tsInfo->ts_uprilim);
-    prv                = tsInfo->ts_upri;
-    tsInfo->ts_upri    = scale_to_lwp_priority(tsLimits.minPrio, maxClamped, newPrio);
-    tsInfo->ts_uprilim = IA_NOCHANGE;
+  } else if (new_class == tsLimits.schedPolicy) {
+    tsparms_t* tsInfo  = (tsparms_t*)ParmInfo.pc_clparms;
+    int maxClamped     = MIN2(tsLimits.maxPrio,
+                              cur_class == new_class
+                                ? (int)tsInfo->ts_uprilim : tsLimits.maxPrio);
+    tsInfo->ts_upri    = scale ? scale_to_lwp_priority(tsLimits.minPrio,
+                                                       maxClamped, newPrio)
+                               : newPrio;
+    tsInfo->ts_uprilim = cur_class == new_class
+                           ? TS_NOCHANGE : (pri_t)tsLimits.maxPrio;
     if (ThreadPriorityVerbose) {
-      tty->print_cr ("TS: %d [%d...%d] %d->%d\n",
-               prv, tsLimits.minPrio, maxClamped, newPrio, tsInfo->ts_upri);
+      tty->print_cr("TS: [%d...%d] %d->%d\n",
+                    tsLimits.minPrio, maxClamped, newPrio, tsInfo->ts_upri);
     }
-    if (prv == tsInfo->ts_upri) return 0;
+  } else if (new_class == fxLimits.schedPolicy) {
+    fxparms_t* fxInfo  = (fxparms_t*)ParmInfo.pc_clparms;
+    int maxClamped     = MIN2(fxLimits.maxPrio,
+                              cur_class == new_class
+                                ? (int)fxInfo->fx_uprilim : fxLimits.maxPrio);
+    fxInfo->fx_upri    = scale ? scale_to_lwp_priority(fxLimits.minPrio,
+                                                       maxClamped, newPrio)
+                               : newPrio;
+    fxInfo->fx_uprilim = cur_class == new_class
+                           ? FX_NOCHANGE : (pri_t)fxLimits.maxPrio;
+    fxInfo->fx_tqsecs  = FX_NOCHANGE;
+    fxInfo->fx_tqnsecs = FX_NOCHANGE;
+    if (ThreadPriorityVerbose) {
+      tty->print_cr("FX: [%d...%d] %d->%d\n",
+                    fxLimits.minPrio, maxClamped, newPrio, fxInfo->fx_upri);
+    }
   } else {
-    if ( ThreadPriorityVerbose ) {
-      tty->print_cr ("Unknown scheduling class\n");
+    if (ThreadPriorityVerbose) {
+      tty->print_cr("Unknown new scheduling class %d\n", new_class);
     }
-      return EINVAL;    // no clue, punt
+    return EINVAL;    // no clue, punt
   }
 
   rslt = (*priocntl_ptr)(PC_VERSION, P_LWPID, lwpid, PC_SETPARMS, (caddr_t)&ParmInfo);
@@ -4017,16 +4080,20 @@
   } else if (ParmInfo.pc_cid == tsLimits.schedPolicy) {
     Actual   = TSPRI(ReadBack)->ts_upri;
     Expected = TSPRI(ParmInfo)->ts_upri;
+  } else if (ParmInfo.pc_cid == fxLimits.schedPolicy) {
+    Actual   = FXPRI(ReadBack)->fx_upri;
+    Expected = FXPRI(ParmInfo)->fx_upri;
   } else {
-    if ( ThreadPriorityVerbose ) {
-      tty->print_cr("set_lwp_priority: unexpected class in readback: %d\n", ParmInfo.pc_cid);
+    if (ThreadPriorityVerbose) {
+      tty->print_cr("set_lwp_class_and_priority: unexpected class in readback: %d\n",
+                    ParmInfo.pc_cid);
     }
   }
 
   if (Actual != Expected) {
-    if ( ThreadPriorityVerbose ) {
-      tty->print_cr ("set_lwp_priority(%d %d) Class=%d: actual=%d vs expected=%d\n",
-             lwpid, newPrio, ReadBack.pc_cid, Actual, Expected);
+    if (ThreadPriorityVerbose) {
+      tty->print_cr ("set_lwp_class_and_priority(%d %d) Class=%d: actual=%d vs expected=%d\n",
+                     lwpid, newPrio, ReadBack.pc_cid, Actual, Expected);
     }
   }
 #endif
@@ -4034,8 +4101,6 @@
   return 0;
 }
 
-
-
 // Solaris only gives access to 128 real priorities at a time,
 // so we expand Java's ten to fill this range.  This would be better
 // if we dynamically adjusted relative priorities.
@@ -4056,8 +4121,7 @@
 // which do not explicitly alter their thread priorities.
 //
 
-
-int os::java_to_os_priority[MaxPriority + 1] = {
+int os::java_to_os_priority[CriticalPriority + 1] = {
   -99999,         // 0 Entry should never be used
 
   0,              // 1 MinPriority
@@ -4072,17 +4136,51 @@
   127,            // 8
   127,            // 9 NearMaxPriority
 
-  127             // 10 MaxPriority
+  127,            // 10 MaxPriority
+
+  -criticalPrio   // 11 CriticalPriority
 };
 
-
 OSReturn os::set_native_priority(Thread* thread, int newpri) {
+  OSThread* osthread = thread->osthread();
+
+  // Save requested priority in case the thread hasn't been started
+  osthread->set_native_priority(newpri);
+
+  // Check for critical priority request
+  bool fxcritical = false;
+  if (newpri == -criticalPrio) {
+    fxcritical = true;
+    newpri = criticalPrio;
+  }
+
   assert(newpri >= MinimumPriority && newpri <= MaximumPriority, "bad priority mapping");
-  if ( !UseThreadPriorities ) return OS_OK;
-  int status = thr_setprio(thread->osthread()->thread_id(), newpri);
-  if ( os::Solaris::T2_libthread() || (UseBoundThreads && thread->osthread()->is_vm_created()) )
-    status |= (set_lwp_priority (thread->osthread()->thread_id(),
-                    thread->osthread()->lwp_id(), newpri ));
+  if (!UseThreadPriorities) return OS_OK;
+
+  int status = 0;
+
+  if (!fxcritical) {
+    // Use thr_setprio only if we have a priority that thr_setprio understands
+    status = thr_setprio(thread->osthread()->thread_id(), newpri);
+  }
+
+  if (os::Solaris::T2_libthread() ||
+      (UseBoundThreads && osthread->is_vm_created())) {
+    int lwp_status =
+      set_lwp_class_and_priority(osthread->thread_id(),
+                                 osthread->lwp_id(),
+                                 newpri,
+                                 fxcritical ? fxLimits.schedPolicy : myClass,
+                                 !fxcritical);
+    if (lwp_status != 0 && fxcritical) {
+      // Try again, this time without changing the scheduling class
+      newpri = java_MaxPriority_to_os_priority;
+      lwp_status = set_lwp_class_and_priority(osthread->thread_id(),
+                                              osthread->lwp_id(),
+                                              newpri, myClass, false);
+    }
+    status |= lwp_status;
+  }
   return (status == 0) ? OS_OK : OS_ERR;
 }
 
@@ -6064,10 +6162,7 @@
  * is no need to track notifications.
  */
 
-#define NANOSECS_PER_SEC 1000000000
-#define NANOSECS_PER_MILLISEC 1000000
 #define MAX_SECS 100000000
-
 /*
  * This code is common to linux and solaris and will be moved to a
  * common place in dolphin.
@@ -6363,17 +6458,16 @@
   RESTARTABLE_RETURN_INT(::close(fd));
 }
 
-int os::recv(int fd, char *buf, int nBytes, int flags) {
-  INTERRUPTIBLE_RETURN_INT(::recv(fd, buf, nBytes, flags), os::Solaris::clear_interrupted);
-}
-
-
-int os::send(int fd, char *buf, int nBytes, int flags) {
-  INTERRUPTIBLE_RETURN_INT(::send(fd, buf, nBytes, flags), os::Solaris::clear_interrupted);
-}
-
-int os::raw_send(int fd, char *buf, int nBytes, int flags) {
-  RESTARTABLE_RETURN_INT(::send(fd, buf, nBytes, flags));
+int os::recv(int fd, char* buf, size_t nBytes, uint flags) {
+  INTERRUPTIBLE_RETURN_INT((int)::recv(fd, buf, nBytes, flags), os::Solaris::clear_interrupted);
+}
+
+int os::send(int fd, char* buf, size_t nBytes, uint flags) {
+  INTERRUPTIBLE_RETURN_INT((int)::send(fd, buf, nBytes, flags), os::Solaris::clear_interrupted);
+}
+
+int os::raw_send(int fd, char* buf, size_t nBytes, uint flags) {
+  RESTARTABLE_RETURN_INT((int)::send(fd, buf, nBytes, flags));
 }
 
 // As both poll and select can be interrupted by signals, we have to be
@@ -6408,19 +6502,19 @@
   }
 }
 
-int os::connect(int fd, struct sockaddr *him, int len) {
+int os::connect(int fd, struct sockaddr *him, socklen_t len) {
   int _result;
-  INTERRUPTIBLE_NORESTART(::connect(fd, him, len), _result,
+  INTERRUPTIBLE_NORESTART(::connect(fd, him, len), _result,\
                           os::Solaris::clear_interrupted);
 
   // Depending on when thread interruption is reset, _result could be
   // one of two values when errno == EINTR
 
   if (((_result == OS_INTRPT) || (_result == OS_ERR))
-                                        && (errno == EINTR)) {
+      && (errno == EINTR)) {
      /* restarting a connect() changes its errno semantics */
-     INTERRUPTIBLE(::connect(fd, him, len), _result,
-                     os::Solaris::clear_interrupted);
+     INTERRUPTIBLE(::connect(fd, him, len), _result,\
+                   os::Solaris::clear_interrupted);
      /* undo these changes */
      if (_result == OS_ERR) {
        if (errno == EALREADY) {
@@ -6434,43 +6528,38 @@
    return _result;
  }
 
-int os::accept(int fd, struct sockaddr *him, int *len) {
-  if (fd < 0)
-   return OS_ERR;
-  INTERRUPTIBLE_RETURN_INT((int)::accept(fd, him,\
-    (socklen_t*) len), os::Solaris::clear_interrupted);
- }
-
-int os::recvfrom(int fd, char *buf, int nBytes, int flags,
-                             sockaddr *from, int *fromlen) {
-   //%%note jvm_r11
-  INTERRUPTIBLE_RETURN_INT((int)::recvfrom(fd, buf, nBytes,\
-    flags, from, fromlen), os::Solaris::clear_interrupted);
-}
-
-int os::sendto(int fd, char *buf, int len, int flags,
-                           struct sockaddr *to, int tolen) {
-  //%%note jvm_r11
-  INTERRUPTIBLE_RETURN_INT((int)::sendto(fd, buf, len, flags,\
-    to, tolen), os::Solaris::clear_interrupted);
+int os::accept(int fd, struct sockaddr* him, socklen_t* len) {
+  if (fd < 0) {
+    return OS_ERR;
+  }
+  INTERRUPTIBLE_RETURN_INT((int)::accept(fd, him, len),\
+                           os::Solaris::clear_interrupted);
+}
+
+int os::recvfrom(int fd, char* buf, size_t nBytes, uint flags,
+                 sockaddr* from, socklen_t* fromlen) {
+  INTERRUPTIBLE_RETURN_INT((int)::recvfrom(fd, buf, nBytes, flags, from, fromlen),\
+                           os::Solaris::clear_interrupted);
+}
+
+int os::sendto(int fd, char* buf, size_t len, uint flags,
+               struct sockaddr* to, socklen_t tolen) {
+  INTERRUPTIBLE_RETURN_INT((int)::sendto(fd, buf, len, flags, to, tolen),\
+                           os::Solaris::clear_interrupted);
 }
 
 int os::socket_available(int fd, jint *pbytes) {
-   if (fd < 0)
-     return OS_OK;
-
-   int ret;
-
-   RESTARTABLE(::ioctl(fd, FIONREAD, pbytes), ret);
-
-   //%% note ioctl can return 0 when successful, JVM_SocketAvailable
-   // is expected to return 0 on failure and 1 on success to the jdk.
-
-   return (ret == OS_ERR) ? 0 : 1;
-}
-
-
-int os::bind(int fd, struct sockaddr *him, int len) {
+  if (fd < 0) {
+    return OS_OK;
+  }
+  int ret;
+  RESTARTABLE(::ioctl(fd, FIONREAD, pbytes), ret);
+  // note: ioctl can return 0 when successful, JVM_SocketAvailable
+  // is expected to return 0 on failure and 1 on success to the jdk.
+  return (ret == OS_ERR) ? 0 : 1;
+}
+
+int os::bind(int fd, struct sockaddr* him, socklen_t len) {
    INTERRUPTIBLE_RETURN_INT_NORESTART(::bind(fd, him, len),\
-     os::Solaris::clear_interrupted);
-}
+                                      os::Solaris::clear_interrupted);
+}
--- a/src/os/solaris/vm/os_solaris.inline.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/os/solaris/vm/os_solaris.inline.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -243,24 +243,25 @@
   return ::shutdown(fd, howto);
 }
 
-inline int os::get_sock_name(int fd, struct sockaddr *him, int *len){
-  return ::getsockname(fd, him, (socklen_t*) len);
+inline int os::get_sock_name(int fd, struct sockaddr* him, socklen_t* len){
+  return ::getsockname(fd, him, len);
 }
 
 inline int os::get_host_name(char* name, int namelen){
   return ::gethostname(name, namelen);
 }
 
-inline struct hostent*  os::get_host_by_name(char* name) {
+inline struct hostent* os::get_host_by_name(char* name) {
   return ::gethostbyname(name);
 }
+
 inline int os::get_sock_opt(int fd, int level, int optname,
-                             char *optval, int* optlen){
-  return ::getsockopt(fd, level, optname, optval, (socklen_t*) optlen);
+                            char* optval, socklen_t* optlen) {
+  return ::getsockopt(fd, level, optname, optval, optlen);
 }
 
 inline int os::set_sock_opt(int fd, int level, int optname,
-                             const char *optval, int optlen){
+                            const char *optval, socklen_t optlen) {
   return ::setsockopt(fd, level, optname, optval, optlen);
 }
 #endif // OS_SOLARIS_VM_OS_SOLARIS_INLINE_HPP
--- a/src/os/windows/vm/decoder_windows.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/os/windows/vm/decoder_windows.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -24,22 +24,24 @@
 
 #include "precompiled.hpp"
 #include "prims/jvm.h"
-#include "runtime/os.hpp"
-#include "utilities/decoder.hpp"
+#include "decoder_windows.hpp"
+
+WindowsDecoder::WindowsDecoder() {
+  _dbghelp_handle = NULL;
+  _can_decode_in_vm = false;
+  _pfnSymGetSymFromAddr64 = NULL;
+  _pfnUndecorateSymbolName = NULL;
 
-HMODULE                   Decoder::_dbghelp_handle = NULL;
-bool                      Decoder::_can_decode_in_vm = false;
-pfn_SymGetSymFromAddr64   Decoder::_pfnSymGetSymFromAddr64 = NULL;
-pfn_UndecorateSymbolName  Decoder::_pfnUndecorateSymbolName = NULL;
+  _decoder_status = no_error;
+  initialize();
+}
 
-void Decoder::initialize() {
-  if (!_initialized) {
-    _initialized = true;
-
-    HINSTANCE handle = os::win32::load_Windows_dll("dbghelp.dll", NULL, 0);
+void WindowsDecoder::initialize() {
+  if (!has_error() && _dbghelp_handle == NULL) {
+    HMODULE handle = ::LoadLibrary("dbghelp.dll");
     if (!handle) {
       _decoder_status = helper_not_found;
-        return;
+      return;
     }
 
     _dbghelp_handle = handle;
@@ -70,32 +72,29 @@
 
      // find out if jvm.dll contains private symbols, by decoding
      // current function and comparing the result
-     address addr = (address)Decoder::initialize;
+     address addr = (address)Decoder::decode;
      char buf[MAX_PATH];
-     if (decode(addr, buf, sizeof(buf), NULL) == no_error) {
-       _can_decode_in_vm = !strcmp(buf, "Decoder::initialize");
+     if (decode(addr, buf, sizeof(buf), NULL)) {
+       _can_decode_in_vm = !strcmp(buf, "Decoder::decode");
      }
   }
 }
 
-void Decoder::uninitialize() {
-  assert(_initialized, "Decoder not yet initialized");
+void WindowsDecoder::uninitialize() {
   _pfnSymGetSymFromAddr64 = NULL;
   _pfnUndecorateSymbolName = NULL;
   if (_dbghelp_handle != NULL) {
     ::FreeLibrary(_dbghelp_handle);
   }
-  _initialized = false;
+  _dbghelp_handle = NULL;
 }
 
-bool Decoder::can_decode_C_frame_in_vm() {
-  initialize();
-  return  _can_decode_in_vm;
+bool WindowsDecoder::can_decode_C_frame_in_vm() const {
+  return  (!has_error() && _can_decode_in_vm);
 }
 
 
-Decoder::decoder_status Decoder::decode(address addr, char *buf, int buflen, int *offset) {
-  assert(_initialized, "Decoder not yet initialized");
+bool WindowsDecoder::decode(address addr, char *buf, int buflen, int* offset, const char* modulepath)  {
   if (_pfnSymGetSymFromAddr64 != NULL) {
     PIMAGEHLP_SYMBOL64 pSymbol;
     char symbolInfo[MAX_PATH + sizeof(IMAGEHLP_SYMBOL64)];
@@ -105,19 +104,20 @@
     DWORD64 displacement;
     if (_pfnSymGetSymFromAddr64(::GetCurrentProcess(), (DWORD64)addr, &displacement, pSymbol)) {
       if (buf != NULL) {
-        if (!demangle(pSymbol->Name, buf, buflen)) {
+        if (demangle(pSymbol->Name, buf, buflen)) {
           jio_snprintf(buf, buflen, "%s", pSymbol->Name);
         }
       }
-      if (offset != NULL) *offset = (int)displacement;
-      return no_error;
+      if(offset != NULL) *offset = (int)displacement;
+      return true;
     }
   }
-  return helper_not_found;
+  if (buf != NULL && buflen > 0) buf[0] = '\0';
+  if (offset != NULL) *offset = -1;
+  return false;
 }
 
-bool Decoder::demangle(const char* symbol, char *buf, int buflen) {
-  assert(_initialized, "Decoder not yet initialized");
+bool WindowsDecoder::demangle(const char* symbol, char *buf, int buflen) {
   return _pfnUndecorateSymbolName != NULL &&
          _pfnUndecorateSymbolName(symbol, buf, buflen, UNDNAME_COMPLETE);
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/os/windows/vm/decoder_windows.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_WINDOWS_VM_DECODER_WINDOWS_HPP
+#define OS_WINDOWS_VM_DECIDER_WINDOWS_HPP
+
+#include <windows.h>
+#include <imagehlp.h>
+
+#include "utilities/decoder.hpp"
+
+// functions needed for decoding symbols
+typedef DWORD (WINAPI *pfn_SymSetOptions)(DWORD);
+typedef BOOL  (WINAPI *pfn_SymInitialize)(HANDLE, PCTSTR, BOOL);
+typedef BOOL  (WINAPI *pfn_SymGetSymFromAddr64)(HANDLE, DWORD64, PDWORD64, PIMAGEHLP_SYMBOL64);
+typedef DWORD (WINAPI *pfn_UndecorateSymbolName)(const char*, char*, DWORD, DWORD);
+
+class WindowsDecoder : public AbstractDecoder {
+
+public:
+  WindowsDecoder();
+  ~WindowsDecoder() { uninitialize(); };
+
+  bool can_decode_C_frame_in_vm() const;
+  bool demangle(const char* symbol, char *buf, int buflen);
+  bool decode(address addr, char *buf, int buflen, int* offset, const char* modulepath = NULL);
+
+private:
+  void initialize();
+  void uninitialize();
+
+private:
+  HMODULE                   _dbghelp_handle;
+  bool                      _can_decode_in_vm;
+  pfn_SymGetSymFromAddr64   _pfnSymGetSymFromAddr64;
+  pfn_UndecorateSymbolName  _pfnUndecorateSymbolName;
+};
+
+#endif // OS_WINDOWS_VM_DECODER_WINDOWS_HPP
+
--- a/src/os/windows/vm/jvm_windows.h	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/os/windows/vm/jvm_windows.h	Mon Feb 27 15:06:36 2012 -0800
@@ -22,6 +22,9 @@
  *
  */
 
+#ifndef OS_WINDOWS_VM_JVM_WINDOWS_H
+#define OS_WINDOWS_VM_JVM_WINDOWS_H
+
 #ifndef _JAVASOFT_JVM_MD_H_
 #define _JAVASOFT_JVM_MD_H_
 
@@ -54,9 +57,9 @@
 #include <Psapi.h>
 #endif
 
-
+#include <Tlhelp32.h>
 
-#include <Tlhelp32.h>
+typedef unsigned int socklen_t;
 
 // #include "jni.h"
 
@@ -129,3 +132,5 @@
 #define SHUTDOWN2_SIGNAL SIGTERM
 
 #endif /* !_JAVASOFT_JVM_MD_H_ */
+
+#endif // OS_WINDOWS_VM_JVM_WINDOWS_H
--- a/src/os/windows/vm/os_windows.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/os/windows/vm/os_windows.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -132,7 +132,6 @@
 // save DLL module handle, used by GetModuleFileName
 
 HINSTANCE vm_lib_handle;
-static int getLastErrorString(char *buf, size_t len);
 
 BOOL WINAPI DllMain(HINSTANCE hinst, DWORD reason, LPVOID reserved) {
   switch (reason) {
@@ -408,6 +407,7 @@
     }
   }
 
+
   if (UseVectoredExceptions) {
     // If we are using vectored exception we don't need to set a SEH
     thread->run();
@@ -820,17 +820,15 @@
   }
 }
 
-#define NANOS_PER_SEC         CONST64(1000000000)
-#define NANOS_PER_MILLISEC    1000000
 jlong os::javaTimeNanos() {
   if (!has_performance_count) {
-    return javaTimeMillis() * NANOS_PER_MILLISEC; // the best we can do.
+    return javaTimeMillis() * NANOSECS_PER_MILLISEC; // the best we can do.
   } else {
     LARGE_INTEGER current_count;
     QueryPerformanceCounter(&current_count);
     double current = as_long(current_count);
     double freq = performance_frequency;
-    jlong time = (jlong)((current/freq) * NANOS_PER_SEC);
+    jlong time = (jlong)((current/freq) * NANOSECS_PER_SEC);
     return time;
   }
 }
@@ -846,15 +844,15 @@
     info_ptr->may_skip_forward = true;
   } else {
     jlong freq = performance_frequency;
-    if (freq < NANOS_PER_SEC) {
+    if (freq < NANOSECS_PER_SEC) {
       // the performance counter is 64 bits and we will
       // be multiplying it -- so no wrap in 64 bits
       info_ptr->max_value = ALL_64_BITS;
-    } else if (freq > NANOS_PER_SEC) {
+    } else if (freq > NANOSECS_PER_SEC) {
       // use the max value the counter can reach to
       // determine the max value which could be returned
       julong max_counter = (julong)ALL_64_BITS;
-      info_ptr->max_value = (jlong)(max_counter / (freq / NANOS_PER_SEC));
+      info_ptr->max_value = (jlong)(max_counter / (freq / NANOSECS_PER_SEC));
     } else {
       // the performance counter is 64 bits and we will
       // be using it directly -- so no wrap in 64 bits
@@ -1393,7 +1391,7 @@
 
 bool os::dll_address_to_function_name(address addr, char *buf,
                                       int buflen, int *offset) {
-  if (Decoder::decode(addr, buf, buflen, offset) == Decoder::no_error) {
+  if (Decoder::decode(addr, buf, buflen, offset)) {
     return true;
   }
   if (offset != NULL)  *offset  = -1;
@@ -1453,7 +1451,7 @@
     return result;
   }
 
-  long errcode = GetLastError();
+  DWORD errcode = GetLastError();
   if (errcode == ERROR_MOD_NOT_FOUND) {
     strncpy(ebuf, "Can't find dependent libraries", ebuflen-1);
     ebuf[ebuflen-1]='\0';
@@ -1464,11 +1462,11 @@
   // If we can read dll-info and find that dll was built
   // for an architecture other than Hotspot is running in
   // - then print to buffer "DLL was built for a different architecture"
-  // else call getLastErrorString to obtain system error message
+  // else call os::lasterror to obtain system error message
 
   // Read system error message into ebuf
   // It may or may not be overwritten below (in the for loop and just above)
-  getLastErrorString(ebuf, (size_t) ebuflen);
+  lasterror(ebuf, (size_t) ebuflen);
   ebuf[ebuflen-1]='\0';
   int file_descriptor=::open(name, O_RDONLY | O_BINARY, 0);
   if (file_descriptor<0)
@@ -1501,7 +1499,7 @@
   ::close(file_descriptor);
   if (failed_to_get_lib_arch)
   {
-    // file i/o error - report getLastErrorString(...) msg
+    // file i/o error - report os::lasterror(...) msg
     return NULL;
   }
 
@@ -1544,7 +1542,7 @@
     "Didn't find runing architecture code in arch_array");
 
   // If the architure is right
-  // but some other error took place - report getLastErrorString(...) msg
+  // but some other error took place - report os::lasterror(...) msg
   if (lib_arch == running_arch)
   {
     return NULL;
@@ -1776,12 +1774,12 @@
 // This method is a copy of JDK's sysGetLastErrorString
 // from src/windows/hpi/src/system_md.c
 
-size_t os::lasterror(char *buf, size_t len) {
-  long errval;
+size_t os::lasterror(char* buf, size_t len) {
+  DWORD errval;
 
   if ((errval = GetLastError()) != 0) {
-      /* DOS error */
-    int n = (int)FormatMessage(
+    // DOS error
+    size_t n = (size_t)FormatMessage(
           FORMAT_MESSAGE_FROM_SYSTEM|FORMAT_MESSAGE_IGNORE_INSERTS,
           NULL,
           errval,
@@ -1790,7 +1788,7 @@
           (DWORD)len,
           NULL);
     if (n > 3) {
-      /* Drop final '.', CR, LF */
+      // Drop final '.', CR, LF
       if (buf[n - 1] == '\n') n--;
       if (buf[n - 1] == '\r') n--;
       if (buf[n - 1] == '.') n--;
@@ -1800,17 +1798,25 @@
   }
 
   if (errno != 0) {
-    /* C runtime error that has no corresponding DOS error code */
-    const char *s = strerror(errno);
+    // C runtime error that has no corresponding DOS error code
+    const char* s = strerror(errno);
     size_t n = strlen(s);
     if (n >= len) n = len - 1;
     strncpy(buf, s, n);
     buf[n] = '\0';
     return n;
   }
+
   return 0;
 }
 
+int os::get_last_error() {
+  DWORD error = GetLastError();
+  if (error == 0)
+    error = errno;
+  return (int)error;
+}
+
 // sun.misc.Signal
 // NOTE that this is a workaround for an apparent kernel bug where if
 // a signal handler for SIGBREAK is installed then that signal handler
@@ -2082,10 +2088,11 @@
 #elif _M_AMD64
   PCONTEXT ctx = exceptionInfo->ContextRecord;
   address pc = (address)ctx->Rip;
-  NOT_PRODUCT(Events::log("idiv overflow exception at " INTPTR_FORMAT , pc));
-  //assert(pc[0] == 0xF7 || (pc[1] == 0xF7 && (pc[0] == 0x41 || pc[0] == 0x49)), "not an idiv opcode");
-  //assert((pc[1] & ~0x7) == 0xF8, "cannot handle non-register operands");
-  //assert((long)ctx->Rax == (long)min_jint || pc[0] == 0x49, "unexpected idiv exception");
+#ifndef GRAAL
+  assert(pc[0] == 0xF7, "not an idiv opcode");
+  assert((pc[1] & ~0x7) == 0xF8, "cannot handle non-register operands");
+  assert(ctx->Rax == min_jint, "unexpected idiv exception");
+#endif
   // set correct result values and continue after idiv instruction
   ctx->Rip = (DWORD)pc + 2;        // idiv reg, reg  is 2 bytes
   ctx->Rax = (DWORD)min_jint;      // result
@@ -2094,7 +2101,6 @@
 #else
   PCONTEXT ctx = exceptionInfo->ContextRecord;
   address pc = (address)ctx->Eip;
-  NOT_PRODUCT(Events::log("idiv overflow exception at " INTPTR_FORMAT , pc));
   assert(pc[0] == 0xF7, "not an idiv opcode");
   assert((pc[1] & ~0x7) == 0xF8, "cannot handle non-register operands");
   assert(ctx->Eax == min_jint, "unexpected idiv exception");
@@ -3131,7 +3137,7 @@
 }
 
 void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) { }
-void os::free_memory(char *addr, size_t bytes)         { }
+void os::free_memory(char *addr, size_t bytes, size_t alignment_hint)    { }
 void os::numa_make_global(char *addr, size_t bytes)    { }
 void os::numa_make_local(char *addr, size_t bytes, int lgrp_hint)    { }
 bool os::numa_topology_changed()                       { return false; }
@@ -3290,7 +3296,7 @@
 // so we compress Java's ten down to seven.  It would be better
 // if we dynamically adjusted relative priorities.
 
-int os::java_to_os_priority[MaxPriority + 1] = {
+int os::java_to_os_priority[CriticalPriority + 1] = {
   THREAD_PRIORITY_IDLE,                         // 0  Entry should never be used
   THREAD_PRIORITY_LOWEST,                       // 1  MinPriority
   THREAD_PRIORITY_LOWEST,                       // 2
@@ -3301,10 +3307,11 @@
   THREAD_PRIORITY_ABOVE_NORMAL,                 // 7
   THREAD_PRIORITY_ABOVE_NORMAL,                 // 8
   THREAD_PRIORITY_HIGHEST,                      // 9  NearMaxPriority
-  THREAD_PRIORITY_HIGHEST                       // 10 MaxPriority
+  THREAD_PRIORITY_HIGHEST,                      // 10 MaxPriority
+  THREAD_PRIORITY_HIGHEST                       // 11 CriticalPriority
 };
 
-int prio_policy1[MaxPriority + 1] = {
+int prio_policy1[CriticalPriority + 1] = {
   THREAD_PRIORITY_IDLE,                         // 0  Entry should never be used
   THREAD_PRIORITY_LOWEST,                       // 1  MinPriority
   THREAD_PRIORITY_LOWEST,                       // 2
@@ -3315,17 +3322,21 @@
   THREAD_PRIORITY_ABOVE_NORMAL,                 // 7
   THREAD_PRIORITY_HIGHEST,                      // 8
   THREAD_PRIORITY_HIGHEST,                      // 9  NearMaxPriority
-  THREAD_PRIORITY_TIME_CRITICAL                 // 10 MaxPriority
+  THREAD_PRIORITY_TIME_CRITICAL,                // 10 MaxPriority
+  THREAD_PRIORITY_TIME_CRITICAL                 // 11 CriticalPriority
 };
 
 static int prio_init() {
   // If ThreadPriorityPolicy is 1, switch tables
   if (ThreadPriorityPolicy == 1) {
     int i;
-    for (i = 0; i < MaxPriority + 1; i++) {
+    for (i = 0; i < CriticalPriority + 1; i++) {
       os::java_to_os_priority[i] = prio_policy1[i];
     }
   }
+  if (UseCriticalJavaThreadPriority) {
+    os::java_to_os_priority[MaxPriority] = os::java_to_os_priority[CriticalPriority] ;
+  }
   return 0;
 }
 
@@ -4747,7 +4758,7 @@
           fatal("corrupted C heap");
         }
       }
-      int err = GetLastError();
+      DWORD err = GetLastError();
       if (err != ERROR_NO_MORE_ITEMS && err != ERROR_CALL_NOT_IMPLEMENTED) {
         fatal(err_msg("heap walk aborted with error %d", err));
       }
@@ -4779,45 +4790,6 @@
   return EXCEPTION_CONTINUE_SEARCH;
 }
 
-static int getLastErrorString(char *buf, size_t len)
-{
-    long errval;
-
-    if ((errval = GetLastError()) != 0)
-    {
-      /* DOS error */
-      size_t n = (size_t)FormatMessage(
-            FORMAT_MESSAGE_FROM_SYSTEM|FORMAT_MESSAGE_IGNORE_INSERTS,
-            NULL,
-            errval,
-            0,
-            buf,
-            (DWORD)len,
-            NULL);
-      if (n > 3) {
-        /* Drop final '.', CR, LF */
-        if (buf[n - 1] == '\n') n--;
-        if (buf[n - 1] == '\r') n--;
-        if (buf[n - 1] == '.') n--;
-        buf[n] = '\0';
-      }
-      return (int)n;
-    }
-
-    if (errno != 0)
-    {
-      /* C runtime error that has no corresponding DOS error code */
-      const char *s = strerror(errno);
-      size_t n = strlen(s);
-      if (n >= len) n = len - 1;
-      strncpy(buf, s, n);
-      buf[n] = '\0';
-      return (int)n;
-    }
-    return 0;
-}
-
-
 // We don't build a headless jre for Windows
 bool os::is_headless_jre() { return false; }
 
@@ -4850,7 +4822,7 @@
   ::mutexUnlock(&sockFnTableMutex);
 }
 
-struct hostent*  os::get_host_by_name(char* name) {
+struct hostent* os::get_host_by_name(char* name) {
   if (!sock_initialized) {
     initSock();
   }
@@ -4881,39 +4853,39 @@
   return 0;
 }
 
-int os::connect(int fd, struct sockaddr *him, int len) {
+int os::connect(int fd, struct sockaddr* him, socklen_t len) {
   ShouldNotReachHere();
   return 0;
 }
 
-int os::accept(int fd, struct sockaddr *him, int *len) {
+int os::accept(int fd, struct sockaddr* him, socklen_t* len) {
   ShouldNotReachHere();
   return 0;
 }
 
-int os::sendto(int fd, char *buf, int len, int flags,
-                        struct sockaddr *to, int tolen) {
+int os::sendto(int fd, char* buf, size_t len, uint flags,
+               struct sockaddr* to, socklen_t tolen) {
   ShouldNotReachHere();
   return 0;
 }
 
-int os::recvfrom(int fd, char *buf, int nBytes, int flags,
-                         sockaddr *from, int *fromlen) {
+int os::recvfrom(int fd, char *buf, size_t nBytes, uint flags,
+                 sockaddr* from, socklen_t* fromlen) {
   ShouldNotReachHere();
   return 0;
 }
 
-int os::recv(int fd, char *buf, int nBytes, int flags) {
+int os::recv(int fd, char* buf, size_t nBytes, uint flags) {
   ShouldNotReachHere();
   return 0;
 }
 
-int os::send(int fd, char *buf, int nBytes, int flags) {
+int os::send(int fd, char* buf, size_t nBytes, uint flags) {
   ShouldNotReachHere();
   return 0;
 }
 
-int os::raw_send(int fd, char *buf, int nBytes, int flags) {
+int os::raw_send(int fd, char* buf, size_t nBytes, uint flags) {
   ShouldNotReachHere();
   return 0;
 }
@@ -4933,24 +4905,24 @@
   return 0;
 }
 
-int os::bind(int fd, struct sockaddr *him, int len) {
+int os::bind(int fd, struct sockaddr* him, socklen_t len) {
   ShouldNotReachHere();
   return 0;
 }
 
-int os::get_sock_name(int fd, struct sockaddr *him, int *len) {
+int os::get_sock_name(int fd, struct sockaddr* him, socklen_t* len) {
   ShouldNotReachHere();
   return 0;
 }
 
 int os::get_sock_opt(int fd, int level, int optname,
-                             char *optval, int* optlen) {
+                     char* optval, socklen_t* optlen) {
   ShouldNotReachHere();
   return 0;
 }
 
 int os::set_sock_opt(int fd, int level, int optname,
-                             const char *optval, int optlen) {
+                     const char* optval, socklen_t optlen) {
   ShouldNotReachHere();
   return 0;
 }
@@ -5364,4 +5336,3 @@
 }
 
 #endif
-
--- a/src/os_cpu/bsd_x86/vm/bsd_x86_32.ad	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/os_cpu/bsd_x86/vm/bsd_x86_32.ad	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 1999, 2008, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
 // This code is free software; you can redistribute it and/or modify it
@@ -24,137 +24,3 @@
 
 // X86 Bsd Architecture Description File
 
-//----------OS-DEPENDENT ENCODING BLOCK-----------------------------------------------------
-// This block specifies the encoding classes used by the compiler to output
-// byte streams.  Encoding classes generate functions which are called by
-// Machine Instruction Nodes in order to generate the bit encoding of the
-// instruction.  Operands specify their base encoding interface with the
-// interface keyword.  There are currently supported four interfaces,
-// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
-// operand to generate a function which returns its register number when
-// queried.   CONST_INTER causes an operand to generate a function which
-// returns the value of the constant when queried.  MEMORY_INTER causes an
-// operand to generate four functions which return the Base Register, the
-// Index Register, the Scale Value, and the Offset Value of the operand when
-// queried.  COND_INTER causes an operand to generate six functions which
-// return the encoding code (ie - encoding bits for the instruction)
-// associated with each basic boolean condition for a conditional instruction.
-// Instructions specify two basic values for encoding.  They use the
-// ins_encode keyword to specify their encoding class (which must be one of
-// the class names specified in the encoding block), and they use the
-// opcode keyword to specify, in order, their primary, secondary, and
-// tertiary opcode.  Only the opcode sections which a particular instruction
-// needs for encoding need to be specified.
-encode %{
-  // Build emit functions for each basic byte or larger field in the intel
-  // encoding scheme (opcode, rm, sib, immediate), and call them from C++
-  // code in the enc_class source block.  Emit functions will live in the
-  // main source block for now.  In future, we can generalize this by
-  // adding a syntax that specifies the sizes of fields in an order,
-  // so that the adlc can build the emit functions automagically
-
-  enc_class bsd_tlsencode (eRegP dst) %{
-    Register dstReg = as_Register($dst$$reg);
-    MacroAssembler* masm = new MacroAssembler(&cbuf);
-      masm->get_thread(dstReg);
-  %}
-
-  enc_class bsd_breakpoint  %{
-    MacroAssembler* masm = new MacroAssembler(&cbuf);
-    masm->call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
-  %}
-
-  enc_class call_epilog %{
-    if( VerifyStackAtCalls ) {
-      // Check that stack depth is unchanged: find majik cookie on stack
-      int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP,-3*VMRegImpl::slots_per_word));
-      if(framesize >= 128) {
-        emit_opcode(cbuf, 0x81); // cmp [esp+0],0xbadb1ood
-        emit_d8(cbuf,0xBC);
-        emit_d8(cbuf,0x24);
-        emit_d32(cbuf,framesize); // Find majik cookie from ESP
-        emit_d32(cbuf, 0xbadb100d);
-      }
-      else {
-        emit_opcode(cbuf, 0x81); // cmp [esp+0],0xbadb1ood
-        emit_d8(cbuf,0x7C);
-        emit_d8(cbuf,0x24);
-        emit_d8(cbuf,framesize); // Find majik cookie from ESP
-        emit_d32(cbuf, 0xbadb100d);
-      }
-      // jmp EQ around INT3
-      // QQQ TODO
-      const int jump_around = 5; // size of call to breakpoint, 1 for CC
-      emit_opcode(cbuf,0x74);
-      emit_d8(cbuf, jump_around);
-      // QQQ temporary
-      emit_break(cbuf);
-      // Die if stack mismatch
-      // emit_opcode(cbuf,0xCC);
-    }
-  %}
-
-%}
-
-// INSTRUCTIONS -- Platform dependent
-
-//----------OS and Locking Instructions----------------------------------------
-
-// This name is KNOWN by the ADLC and cannot be changed.
-// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
-// for this guy.
-instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
-  match(Set dst (ThreadLocal));
-  effect(DEF dst, KILL cr);
-
-  format %{ "MOV    $dst, Thread::current()" %}
-  ins_encode( bsd_tlsencode(dst) );
-  ins_pipe( ialu_reg_fat );
-%}
-
-instruct TLS(eRegP dst) %{
-  match(Set dst (ThreadLocal));
-
-  expand %{
-    tlsLoadP(dst);
-  %}
-%}
-
-// Die now
-instruct ShouldNotReachHere( )
-%{
-  match(Halt);
-
-  // Use the following format syntax
-  format %{ "INT3   ; ShouldNotReachHere" %}
-  // QQQ TODO for now call breakpoint
-  // opcode(0xCC);
-  // ins_encode(Opc);
-  ins_encode(bsd_breakpoint);
-  ins_pipe( pipe_slow );
-%}
-
-
-
-// Platform dependent source
-
-source %{
-
-// emit an interrupt that is caught by the debugger
-void emit_break(CodeBuffer &cbuf) {
-
-  // Debugger doesn't really catch this but best we can do so far QQQ
-  MacroAssembler* masm = new MacroAssembler(&cbuf);
-  masm->call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
-}
-
-void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-  emit_break(cbuf);
-}
-
-
-uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
-  return 5;
-}
-
-%}
--- a/src/os_cpu/bsd_x86/vm/bsd_x86_64.ad	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/os_cpu/bsd_x86/vm/bsd_x86_64.ad	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
 // This code is free software; you can redistribute it and/or modify it
@@ -55,8 +55,7 @@
   // adding a syntax that specifies the sizes of fields in an order,
   // so that the adlc can build the emit functions automagically
 
-  enc_class Java_To_Runtime(method meth)
-  %{
+  enc_class Java_To_Runtime(method meth) %{
     // No relocation needed
 
     // movq r10, <meth>
@@ -70,104 +69,15 @@
     emit_opcode(cbuf, 0xD0 | (R10_enc - 8));
   %}
 
-  enc_class bsd_breakpoint
-  %{
-    MacroAssembler* masm = new MacroAssembler(&cbuf);
-    masm->call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
-  %}
-
-  enc_class call_epilog
-  %{
-    if (VerifyStackAtCalls) {
-      // Check that stack depth is unchanged: find majik cookie on stack
-      int framesize =
-        ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
-      if (framesize) {
-        if (framesize < 0x80) {
-          emit_opcode(cbuf, Assembler::REX_W);
-          emit_opcode(cbuf, 0x81); // cmpq [rsp+0],0xbadb1ood
-          emit_d8(cbuf, 0x7C);
-          emit_d8(cbuf, 0x24);
-          emit_d8(cbuf, framesize); // Find majik cookie from ESP
-          emit_d32(cbuf, 0xbadb100d);
-        } else {
-          emit_opcode(cbuf, Assembler::REX_W);
-          emit_opcode(cbuf, 0x81); // cmpq [rsp+0],0xbadb1ood
-          emit_d8(cbuf, 0xBC);
-          emit_d8(cbuf, 0x24);
-          emit_d32(cbuf, framesize); // Find majik cookie from ESP
-          emit_d32(cbuf, 0xbadb100d);
-        }
-      }
-      // jmp EQ around INT3
-      // QQQ TODO
-      const int jump_around = 5; // size of call to breakpoint, 1 for CC
-      emit_opcode(cbuf, 0x74);
-      emit_d8(cbuf, jump_around);
-      // QQQ temporary
-      emit_break(cbuf);
-      // Die if stack mismatch
-      // emit_opcode(cbuf,0xCC);
-    }
-  %}
-
-%}
-
-// INSTRUCTIONS -- Platform dependent
-
-//----------OS and Locking Instructions----------------------------------------
-
-// This name is KNOWN by the ADLC and cannot be changed.
-// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
-// for this guy.
-instruct tlsLoadP(r15_RegP dst)
-%{
-  match(Set dst (ThreadLocal));
-  effect(DEF dst);
-
-  size(0);
-  format %{ "# TLS is in R15" %}
-  ins_encode( /*empty encoding*/ );
-  ins_pipe(ialu_reg_reg);
-%}
-
-// Die now
-instruct ShouldNotReachHere()
-%{
-  match(Halt);
-
-  // Use the following format syntax
-  format %{ "int3\t# ShouldNotReachHere" %}
-  // QQQ TODO for now call breakpoint
-  // opcode(0xCC);
-  // ins_encode(Opc);
-  ins_encode(bsd_breakpoint);
-  ins_pipe(pipe_slow);
 %}
 
 
 // Platform dependent source
 
-source
-%{
+source %{
 
 int MachCallRuntimeNode::ret_addr_offset() {
   return 13; // movq r10,#addr; callq (r10)
 }
 
-// emit an interrupt that is caught by the debugger
-void emit_break(CodeBuffer& cbuf) {
-  // Debugger doesn't really catch this but best we can do so far QQQ
-  MacroAssembler* masm = new MacroAssembler(&cbuf);
-  masm->call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
-}
-
-void MachBreakpointNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
-  emit_break(cbuf);
-}
-
-uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
-  return 5;
-}
-
 %}
--- a/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -362,7 +362,7 @@
 }
 
 intptr_t* _get_previous_fp() {
-#if defined(SPARC_WORKS) || defined(__clang__)
+#if defined(SPARC_WORKS) || defined(__clang__) || defined(__llvm__)
   register intptr_t **ebp;
   __asm__("mov %%"SPELL_REG_FP", %0":"=r"(ebp));
 #else
--- a/src/os_cpu/bsd_x86/vm/os_bsd_x86.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/os_cpu/bsd_x86/vm/os_bsd_x86.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -28,6 +28,8 @@
   static void setup_fpu();
   static bool supports_sse();
 
+  static jlong rdtsc();
+
   static bool is_allocatable(size_t bytes);
 
   // Used to register dynamic code cache area with the OS
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/os_cpu/bsd_x86/vm/os_bsd_x86.inline.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_BSD_X86_VM_OS_BSD_X86_INLINE_HPP
+#define OS_CPU_BSD_X86_VM_OS_BSD_X86_INLINE_HPP
+
+#include "runtime/os.hpp"
+
+// See http://www.technovelty.org/code/c/reading-rdtsc.htl for details
+inline jlong os::rdtsc() {
+#ifndef AMD64
+  // 64 bit result in edx:eax
+  uint64_t res;
+  __asm__ __volatile__ ("rdtsc" : "=A" (res));
+  return (jlong)res;
+#else
+  uint64_t res;
+  uint32_t ts1, ts2;
+  __asm__ __volatile__ ("rdtsc" : "=a" (ts1), "=d" (ts2));
+  res = ((uint64_t)ts1 | (uint64_t)ts2 << 32);
+  return (jlong)res;
+#endif // AMD64
+}
+
+#endif // OS_CPU_BSD_X86_VM_OS_BSD_X86_INLINE_HPP
--- a/src/os_cpu/bsd_x86/vm/vmStructs_bsd_x86.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/os_cpu/bsd_x86/vm/vmStructs_bsd_x86.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -29,12 +29,18 @@
 // constants required by the Serviceability Agent. This file is
 // referenced by vmStructs.cpp.
 
+#ifdef __APPLE__
+#define OS_THREAD_ID_TYPE thread_t
+#else
+#define OS_THREAD_ID_TYPE pthread_t
+#endif
+
 #define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field, last_entry) \
                                                                                                                                      \
   /******************************/                                                                                                   \
   /* Threads (NOTE: incomplete) */                                                                                                   \
   /******************************/                                                                                                   \
-  nonstatic_field(OSThread,                      _thread_id,                                      pthread_t)                             \
+  nonstatic_field(OSThread,                      _thread_id,                                      OS_THREAD_ID_TYPE)                 \
   nonstatic_field(OSThread,                      _pthread_id,                                     pthread_t)                         \
   /* This must be the last entry, and must be present */                                                                             \
   last_entry()
@@ -46,7 +52,7 @@
   /* Posix Thread IDs   */                                                \
   /**********************/                                                \
                                                                           \
-  declare_integer_type(pid_t)                                             \
+  declare_unsigned_integer_type(thread_t)                                 \
   declare_unsigned_integer_type(pthread_t)                                \
                                                                           \
   /* This must be the last entry, and must be present */                  \
--- a/src/os_cpu/linux_x86/vm/linux_x86_32.ad	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/os_cpu/linux_x86/vm/linux_x86_32.ad	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 1999, 2008, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
 // This code is free software; you can redistribute it and/or modify it
@@ -24,137 +24,3 @@
 
 // X86 Linux Architecture Description File
 
-//----------OS-DEPENDENT ENCODING BLOCK-----------------------------------------------------
-// This block specifies the encoding classes used by the compiler to output
-// byte streams.  Encoding classes generate functions which are called by
-// Machine Instruction Nodes in order to generate the bit encoding of the
-// instruction.  Operands specify their base encoding interface with the
-// interface keyword.  There are currently supported four interfaces,
-// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
-// operand to generate a function which returns its register number when
-// queried.   CONST_INTER causes an operand to generate a function which
-// returns the value of the constant when queried.  MEMORY_INTER causes an
-// operand to generate four functions which return the Base Register, the
-// Index Register, the Scale Value, and the Offset Value of the operand when
-// queried.  COND_INTER causes an operand to generate six functions which
-// return the encoding code (ie - encoding bits for the instruction)
-// associated with each basic boolean condition for a conditional instruction.
-// Instructions specify two basic values for encoding.  They use the
-// ins_encode keyword to specify their encoding class (which must be one of
-// the class names specified in the encoding block), and they use the
-// opcode keyword to specify, in order, their primary, secondary, and
-// tertiary opcode.  Only the opcode sections which a particular instruction
-// needs for encoding need to be specified.
-encode %{
-  // Build emit functions for each basic byte or larger field in the intel
-  // encoding scheme (opcode, rm, sib, immediate), and call them from C++
-  // code in the enc_class source block.  Emit functions will live in the
-  // main source block for now.  In future, we can generalize this by
-  // adding a syntax that specifies the sizes of fields in an order,
-  // so that the adlc can build the emit functions automagically
-
-  enc_class linux_tlsencode (eRegP dst) %{
-    Register dstReg = as_Register($dst$$reg);
-    MacroAssembler* masm = new MacroAssembler(&cbuf);
-      masm->get_thread(dstReg);
-  %}
-
-  enc_class linux_breakpoint  %{
-    MacroAssembler* masm = new MacroAssembler(&cbuf);
-    masm->call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
-  %}
-
-  enc_class call_epilog %{
-    if( VerifyStackAtCalls ) {
-      // Check that stack depth is unchanged: find majik cookie on stack
-      int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP,-3*VMRegImpl::slots_per_word));
-      if(framesize >= 128) {
-        emit_opcode(cbuf, 0x81); // cmp [esp+0],0xbadb1ood
-        emit_d8(cbuf,0xBC);
-        emit_d8(cbuf,0x24);
-        emit_d32(cbuf,framesize); // Find majik cookie from ESP
-        emit_d32(cbuf, 0xbadb100d);
-      }
-      else {
-        emit_opcode(cbuf, 0x81); // cmp [esp+0],0xbadb1ood
-        emit_d8(cbuf,0x7C);
-        emit_d8(cbuf,0x24);
-        emit_d8(cbuf,framesize); // Find majik cookie from ESP
-        emit_d32(cbuf, 0xbadb100d);
-      }
-      // jmp EQ around INT3
-      // QQQ TODO
-      const int jump_around = 5; // size of call to breakpoint, 1 for CC
-      emit_opcode(cbuf,0x74);
-      emit_d8(cbuf, jump_around);
-      // QQQ temporary
-      emit_break(cbuf);
-      // Die if stack mismatch
-      // emit_opcode(cbuf,0xCC);
-    }
-  %}
-
-%}
-
-// INSTRUCTIONS -- Platform dependent
-
-//----------OS and Locking Instructions----------------------------------------
-
-// This name is KNOWN by the ADLC and cannot be changed.
-// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
-// for this guy.
-instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
-  match(Set dst (ThreadLocal));
-  effect(DEF dst, KILL cr);
-
-  format %{ "MOV    $dst, Thread::current()" %}
-  ins_encode( linux_tlsencode(dst) );
-  ins_pipe( ialu_reg_fat );
-%}
-
-instruct TLS(eRegP dst) %{
-  match(Set dst (ThreadLocal));
-
-  expand %{
-    tlsLoadP(dst);
-  %}
-%}
-
-// Die now
-instruct ShouldNotReachHere( )
-%{
-  match(Halt);
-
-  // Use the following format syntax
-  format %{ "INT3   ; ShouldNotReachHere" %}
-  // QQQ TODO for now call breakpoint
-  // opcode(0xCC);
-  // ins_encode(Opc);
-  ins_encode(linux_breakpoint);
-  ins_pipe( pipe_slow );
-%}
-
-
-
-// Platform dependent source
-
-source %{
-
-// emit an interrupt that is caught by the debugger
-void emit_break(CodeBuffer &cbuf) {
-
-  // Debugger doesn't really catch this but best we can do so far QQQ
-  MacroAssembler* masm = new MacroAssembler(&cbuf);
-  masm->call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
-}
-
-void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-  emit_break(cbuf);
-}
-
-
-uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
-  return MachNode::size(ra_);
-}
-
-%}
--- a/src/os_cpu/linux_x86/vm/linux_x86_64.ad	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/os_cpu/linux_x86/vm/linux_x86_64.ad	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
 // This code is free software; you can redistribute it and/or modify it
@@ -55,8 +55,7 @@
   // adding a syntax that specifies the sizes of fields in an order,
   // so that the adlc can build the emit functions automagically
 
-  enc_class Java_To_Runtime(method meth)
-  %{
+  enc_class Java_To_Runtime(method meth) %{
     // No relocation needed
 
     // movq r10, <meth>
@@ -70,105 +69,15 @@
     emit_opcode(cbuf, 0xD0 | (R10_enc - 8));
   %}
 
-  enc_class linux_breakpoint
-  %{
-    MacroAssembler* masm = new MacroAssembler(&cbuf);
-    masm->call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
-  %}
-
-  enc_class call_epilog
-  %{
-    if (VerifyStackAtCalls) {
-      // Check that stack depth is unchanged: find majik cookie on stack
-      int framesize =
-        ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
-      if (framesize) {
-        if (framesize < 0x80) {
-          emit_opcode(cbuf, Assembler::REX_W);
-          emit_opcode(cbuf, 0x81); // cmpq [rsp+0],0xbadb1ood
-          emit_d8(cbuf, 0x7C);
-          emit_d8(cbuf, 0x24);
-          emit_d8(cbuf, framesize); // Find majik cookie from ESP
-          emit_d32(cbuf, 0xbadb100d);
-        } else {
-          emit_opcode(cbuf, Assembler::REX_W);
-          emit_opcode(cbuf, 0x81); // cmpq [rsp+0],0xbadb1ood
-          emit_d8(cbuf, 0xBC);
-          emit_d8(cbuf, 0x24);
-          emit_d32(cbuf, framesize); // Find majik cookie from ESP
-          emit_d32(cbuf, 0xbadb100d);
-        }
-      }
-      // jmp EQ around INT3
-      // QQQ TODO
-      const int jump_around = 5; // size of call to breakpoint, 1 for CC
-      emit_opcode(cbuf, 0x74);
-      emit_d8(cbuf, jump_around);
-      // QQQ temporary
-      emit_break(cbuf);
-      // Die if stack mismatch
-      // emit_opcode(cbuf,0xCC);
-    }
-  %}
-
-%}
-
-// INSTRUCTIONS -- Platform dependent
-
-//----------OS and Locking Instructions----------------------------------------
-
-// This name is KNOWN by the ADLC and cannot be changed.
-// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
-// for this guy.
-instruct tlsLoadP(r15_RegP dst)
-%{
-  match(Set dst (ThreadLocal));
-  effect(DEF dst);
-
-  size(0);
-  format %{ "# TLS is in R15" %}
-  ins_encode( /*empty encoding*/ );
-  ins_pipe(ialu_reg_reg);
-%}
-
-// Die now
-instruct ShouldNotReachHere()
-%{
-  match(Halt);
-
-  // Use the following format syntax
-  format %{ "int3\t# ShouldNotReachHere" %}
-  // QQQ TODO for now call breakpoint
-  // opcode(0xCC);
-  // ins_encode(Opc);
-  ins_encode(linux_breakpoint);
-  ins_pipe(pipe_slow);
 %}
 
 
 // Platform dependent source
 
-source
-%{
+source %{
 
 int MachCallRuntimeNode::ret_addr_offset() {
   return 13; // movq r10,#addr; callq (r10)
 }
 
-// emit an interrupt that is caught by the debugger
-void emit_break(CodeBuffer& cbuf) {
-  // Debugger doesn't really catch this but best we can do so far QQQ
-  MacroAssembler* masm = new MacroAssembler(&cbuf);
-  masm->call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
-}
-
-void MachBreakpointNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
-  emit_break(cbuf);
-}
-
-uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
-  // distance could be far and requires load and call through register
-  return MachNode::size(ra_);
-}
-
 %}
--- a/src/os_cpu/linux_x86/vm/os_linux_x86.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/os_cpu/linux_x86/vm/os_linux_x86.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -28,6 +28,8 @@
   static void setup_fpu();
   static bool supports_sse();
 
+  static jlong rdtsc();
+
   static bool is_allocatable(size_t bytes);
 
   // Used to register dynamic code cache area with the OS
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/os_cpu/linux_x86/vm/os_linux_x86.inline.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_X86_VM_OS_LINUX_X86_INLINE_HPP
+#define OS_CPU_LINUX_X86_VM_OS_LINUX_X86_INLINE_HPP
+
+#include "runtime/os.hpp"
+
+// See http://www.technovelty.org/code/c/reading-rdtsc.htl for details
+inline jlong os::rdtsc() {
+#ifndef AMD64
+  // 64 bit result in edx:eax
+  uint64_t res;
+  __asm__ __volatile__ ("rdtsc" : "=A" (res));
+  return (jlong)res;
+#else
+  uint64_t res;
+  uint32_t ts1, ts2;
+  __asm__ __volatile__ ("rdtsc" : "=a" (ts1), "=d" (ts2));
+  res = ((uint64_t)ts1 | (uint64_t)ts2 << 32);
+  return (jlong)res;
+#endif // AMD64
+}
+
+#endif // OS_CPU_LINUX_X86_VM_OS_LINUX_X86_INLINE_HPP
--- a/src/os_cpu/solaris_x86/vm/os_solaris_x86.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/os_cpu/solaris_x86/vm/os_solaris_x86.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -46,6 +46,8 @@
 
   static bool supports_sse();
 
+  static jlong rdtsc();
+
   static bool is_allocatable(size_t bytes);
 
   // Used to register dynamic code cache area with the OS
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/os_cpu/solaris_x86/vm/os_solaris_x86.inline.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_SOLARIS_X86_VM_OS_SOLARIS_X86_INLINE_HPP
+#define OS_CPU_SOLARIS_X86_VM_OS_SOLARIS_X86_INLINE_HPP
+
+#include "runtime/os.hpp"
+
+inline jlong os::rdtsc() { return _raw_rdtsc(); }
+
+#endif // OS_CPU_SOLARIS_X86_VM_OS_SOLARIS_X86_INLINE_HPP
--- a/src/os_cpu/solaris_x86/vm/solaris_x86_32.ad	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/os_cpu/solaris_x86/vm/solaris_x86_32.ad	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 1999, 2008, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
 // This code is free software; you can redistribute it and/or modify it
@@ -24,144 +24,3 @@
 
 // X86 Solaris Architecture Description File
 
-//----------OS-DEPENDENT ENCODING BLOCK-----------------------------------------------------
-// This block specifies the encoding classes used by the compiler to output
-// byte streams.  Encoding classes generate functions which are called by
-// Machine Instruction Nodes in order to generate the bit encoding of the
-// instruction.  Operands specify their base encoding interface with the
-// interface keyword.  There are currently supported four interfaces,
-// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
-// operand to generate a function which returns its register number when
-// queried.   CONST_INTER causes an operand to generate a function which
-// returns the value of the constant when queried.  MEMORY_INTER causes an
-// operand to generate four functions which return the Base Register, the
-// Index Register, the Scale Value, and the Offset Value of the operand when
-// queried.  COND_INTER causes an operand to generate six functions which
-// return the encoding code (ie - encoding bits for the instruction)
-// associated with each basic boolean condition for a conditional instruction.
-// Instructions specify two basic values for encoding.  They use the
-// ins_encode keyword to specify their encoding class (which must be one of
-// the class names specified in the encoding block), and they use the
-// opcode keyword to specify, in order, their primary, secondary, and
-// tertiary opcode.  Only the opcode sections which a particular instruction
-// needs for encoding need to be specified.
-encode %{
-  // Build emit functions for each basic byte or larger field in the intel
-  // encoding scheme (opcode, rm, sib, immediate), and call them from C++
-  // code in the enc_class source block.  Emit functions will live in the
-  // main source block for now.  In future, we can generalize this by
-  // adding a syntax that specifies the sizes of fields in an order,
-  // so that the adlc can build the emit functions automagically
-
-  enc_class solaris_tlsencode (eRegP dst) %{
-    Register dstReg = as_Register($dst$$reg);
-    MacroAssembler* masm = new MacroAssembler(&cbuf);
-    masm->get_thread(dstReg);
-  %}
-
-  enc_class solaris_breakpoint  %{
-    MacroAssembler* masm = new MacroAssembler(&cbuf);
-    // Really need to fix this
-    masm->push(rax);
-    masm->push(rcx);
-    masm->push(rdx);
-    masm->call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
-    masm->pop(rdx);
-    masm->pop(rcx);
-    masm->pop(rax);
-  %}
-
-  enc_class call_epilog %{
-    if( VerifyStackAtCalls ) {
-      // Check that stack depth is unchanged: find majik cookie on stack
-      int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP,-3*VMRegImpl::slots_per_word));
-      if(framesize >= 128) {
-        emit_opcode(cbuf, 0x81); // cmp [esp+0],0xbadb1ood
-        emit_d8(cbuf,0xBC);
-        emit_d8(cbuf,0x24);
-        emit_d32(cbuf,framesize); // Find majik cookie from ESP
-        emit_d32(cbuf, 0xbadb100d);
-      }
-      else {
-        emit_opcode(cbuf, 0x81); // cmp [esp+0],0xbadb1ood
-        emit_d8(cbuf,0x7C);
-        emit_d8(cbuf,0x24);
-        emit_d8(cbuf,framesize); // Find majik cookie from ESP
-        emit_d32(cbuf, 0xbadb100d);
-      }
-      // jmp EQ around INT3
-      // QQQ TODO
-      const int jump_around = 11; // size of call to breakpoint (and register preserve), 1 for CC
-      emit_opcode(cbuf,0x74);
-      emit_d8(cbuf, jump_around);
-      // QQQ temporary
-      emit_break(cbuf);
-      // Die if stack mismatch
-      // emit_opcode(cbuf,0xCC);
-    }
-  %}
-
-%}
-
-// INSTRUCTIONS -- Platform dependent
-
-//----------OS and Locking Instructions----------------------------------------
-
-// This name is KNOWN by the ADLC and cannot be changed.
-// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
-// for this guy.
-instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
-  match(Set dst (ThreadLocal));
-  effect(DEF dst, KILL cr);
-
-  format %{ "MOV    $dst, Thread::current()" %}
-  ins_encode( solaris_tlsencode(dst) );
-  ins_pipe( ialu_reg_fat );
-%}
-
-instruct TLS(eRegP dst) %{
-  match(Set dst (ThreadLocal));
-
-  expand %{
-    tlsLoadP(dst);
-  %}
-%}
-
-// Die now
-instruct ShouldNotReachHere( )
-%{
-  match(Halt);
-
-  // Use the following format syntax
-  format %{ "INT3   ; ShouldNotReachHere" %}
-  // QQQ TODO for now call breakpoint
-  // opcode(0xCC);
-  // ins_encode(Opc);
-  ins_encode(solaris_breakpoint);
-  ins_pipe( pipe_slow );
-%}
-
-
-
-// Platform dependent source
-
-source %{
-
-// emit an interrupt that is caught by the debugger
-void emit_break(CodeBuffer &cbuf) {
-
-  // Debugger doesn't really catch this but best we can do so far QQQ
-  MacroAssembler* masm = new MacroAssembler(&cbuf);
-  masm->call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
-}
-
-void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-  emit_break(cbuf);
-}
-
-
-uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
-  return MachNode::size(ra_);
-}
-
-%}
--- a/src/os_cpu/solaris_x86/vm/solaris_x86_32.il	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/os_cpu/solaris_x86/vm/solaris_x86_32.il	Mon Feb 27 15:06:36 2012 -0800
@@ -43,6 +43,11 @@
       movl     %ebp, %eax 
       .end
 
+  // Support for os::rdtsc()
+      .inline _raw_rdtsc,0
+      rdtsc
+      .end
+
   // Support for jint Atomic::add(jint inc, volatile jint* dest)
   // An additional bool (os::is_MP()) is passed as the last argument.
       .inline _Atomic_add,3
@@ -113,7 +118,6 @@
       fistpll   (%eax)
       .end
 
-
   // Support for OrderAccess::acquire()
       .inline _OrderAccess_acquire,0
       movl     0(%esp), %eax
--- a/src/os_cpu/solaris_x86/vm/solaris_x86_64.ad	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/os_cpu/solaris_x86/vm/solaris_x86_64.ad	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2004, 2006, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2004, 2012, Oracle and/or its affiliates. All rights reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
 // This code is free software; you can redistribute it and/or modify it
@@ -55,8 +55,7 @@
   // adding a syntax that specifies the sizes of fields in an order,
   // so that the adlc can build the emit functions automagically
 
-  enc_class Java_To_Runtime(method meth)
-  %{
+  enc_class Java_To_Runtime(method meth) %{
     // No relocation needed
 
     // movq r10, <meth>
@@ -70,118 +69,24 @@
     emit_opcode(cbuf, 0xD0 | (R10_enc - 8));
   %}
 
-  enc_class solaris_breakpoint
-  %{
-    MacroAssembler* masm = new MacroAssembler(&cbuf);
-    masm->call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
-  %}
-
-  enc_class call_epilog
-  %{
-    if (VerifyStackAtCalls) {
-      // Check that stack depth is unchanged: find majik cookie on stack
-      int framesize =
-        ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
-      if (framesize) {
-        if (framesize < 0x80) {
-          emit_opcode(cbuf, Assembler::REX_W);
-          emit_opcode(cbuf, 0x81); // cmpq [rsp+0],0xbadb1ood
-          emit_d8(cbuf, 0x7C);
-          emit_d8(cbuf, 0x24);
-          emit_d8(cbuf, framesize); // Find majik cookie from ESP
-          emit_d32(cbuf, 0xbadb100d);
-        } else {
-          emit_opcode(cbuf, Assembler::REX_W);
-          emit_opcode(cbuf, 0x81); // cmpq [rsp+0],0xbadb1ood
-          emit_d8(cbuf, 0xBC);
-          emit_d8(cbuf, 0x24);
-          emit_d32(cbuf, framesize); // Find majik cookie from ESP
-          emit_d32(cbuf, 0xbadb100d);
-        }
-      }
-      // jmp EQ around INT3
-      // QQQ TODO
-      const int jump_around = 5; // size of call to breakpoint, 1 for CC
-      emit_opcode(cbuf, 0x74);
-      emit_d8(cbuf, jump_around);
-      // QQQ temporary
-      emit_break(cbuf);
-      // Die if stack mismatch
-      // emit_opcode(cbuf,0xCC);
+  enc_class post_call_verify_mxcsr %{
+    MacroAssembler _masm(&cbuf);
+    if (RestoreMXCSROnJNICalls) {
+      __ ldmxcsr(ExternalAddress(StubRoutines::amd64::mxcsr_std()));
+    }
+    else if (CheckJNICalls) {
+      __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::amd64::verify_mxcsr_entry())));
     }
   %}
-
-  enc_class post_call_verify_mxcsr %{
-    MacroAssembler masm(&cbuf);
-    if (RestoreMXCSROnJNICalls) {
-      masm.ldmxcsr(ExternalAddress(StubRoutines::amd64::mxcsr_std()));
-    }
-    else if (CheckJNICalls) {
-      masm.call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::amd64::verify_mxcsr_entry())));
-    }
-  %}
-%}
-
-// INSTRUCTIONS -- Platform dependent
-
-//----------OS and Locking Instructions----------------------------------------
-
-// This name is KNOWN by the ADLC and cannot be changed.
-// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
-// for this guy.
-instruct tlsLoadP(r15_RegP dst)
-%{
-  match(Set dst (ThreadLocal));
-  effect(DEF dst);
-
-  size(0);
-  format %{ "# TLS is in R15" %}
-  ins_encode( /*empty encoding*/ );
-  ins_pipe(ialu_reg_reg);
-%}
-
-// Die now
-instruct ShouldNotReachHere()
-%{
-  match(Halt);
-
-  // Use the following format syntax
-  format %{ "int3\t# ShouldNotReachHere" %}
-  // QQQ TODO for now call breakpoint
-  // opcode(0xCC);
-  // ins_encode(Opc);
-  ins_encode(solaris_breakpoint);
-  ins_pipe(pipe_slow);
 %}
 
 
 // Platform dependent source
 
-source
-%{
+source %{
 
-int MachCallRuntimeNode::ret_addr_offset()
-{
+int MachCallRuntimeNode::ret_addr_offset() {
   return 13; // movq r10,#addr; callq (r10)
 }
 
-// emit an interrupt that is caught by the debugger
-void emit_break(CodeBuffer& cbuf)
-{
-  // Debugger doesn't really catch this but best we can do so far QQQ
-  MacroAssembler* masm = new MacroAssembler(&cbuf);
-  masm->call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
-}
-
-void MachBreakpointNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
-{
-  emit_break(cbuf);
-}
-
-uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const
-{
-  // distance could be far and requires load and call through register
-  return MachNode::size(ra_);
-}
-
 %}
--- a/src/os_cpu/solaris_x86/vm/solaris_x86_64.il	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/os_cpu/solaris_x86/vm/solaris_x86_64.il	Mon Feb 27 15:06:36 2012 -0800
@@ -30,12 +30,19 @@
       movq     %fs:0, %rax 
       .end
 
-  // Get the frame pointer from current frame.
+  // Get current fp
       .inline _get_current_fp,0
       .volatile
       movq     %rbp, %rax 
       .end
 
+  // Support for os::rdtsc()
+      .inline _raw_rdtsc,0
+      rdtsc
+      salq     $32, %rdx
+      orq      %rdx, %rax
+      .end
+
   // Support for jint Atomic::add(jint add_value, volatile jint* dest)
       .inline _Atomic_add,2
       movl     %edi, %eax      // save add_value for return
--- a/src/os_cpu/windows_x86/vm/os_windows_x86.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/os_cpu/windows_x86/vm/os_windows_x86.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -58,6 +58,8 @@
   static void setup_fpu();
   static bool supports_sse() { return true; }
 
+  static jlong rdtsc();
+
   static bool      register_code_area(char *low, char *high);
 
 #endif // OS_CPU_WINDOWS_X86_VM_OS_WINDOWS_X86_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/os_cpu/windows_x86/vm/os_windows_x86.inline.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_WINDOWS_X86_VM_OS_WINDOWS_X86_INLINE_HPP
+#define OS_CPU_WINDOWS_X86_VM_OS_WINDOWS_X86_INLINE_HPP
+
+#include "runtime/os.hpp"
+
+inline jlong os::rdtsc() {
+  // 32 bit: 64 bit result in edx:eax
+  // 64 bit: 64 bit value in rax
+  uint64_t res;
+  res = (uint64_t)__rdtsc();
+  return (jlong)res;
+}
+
+#endif // OS_CPU_WINDOWS_X86_VM_OS_WINDOWS_X86_INLINE_HPP
--- a/src/os_cpu/windows_x86/vm/windows_x86_32.ad	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/os_cpu/windows_x86/vm/windows_x86_32.ad	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
 // This code is free software; you can redistribute it and/or modify it
@@ -24,134 +24,3 @@
 
 // X86 Win32 Architecture Description File
 
-//----------OS-DEPENDENT ENCODING BLOCK-----------------------------------------------------
-// This block specifies the encoding classes used by the compiler to output
-// byte streams.  Encoding classes generate functions which are called by
-// Machine Instruction Nodes in order to generate the bit encoding of the
-// instruction.  Operands specify their base encoding interface with the
-// interface keyword.  There are currently supported four interfaces,
-// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
-// operand to generate a function which returns its register number when
-// queried.   CONST_INTER causes an operand to generate a function which
-// returns the value of the constant when queried.  MEMORY_INTER causes an
-// operand to generate four functions which return the Base Register, the
-// Index Register, the Scale Value, and the Offset Value of the operand when
-// queried.  COND_INTER causes an operand to generate six functions which
-// return the encoding code (ie - encoding bits for the instruction)
-// associated with each basic boolean condition for a conditional instruction.
-// Instructions specify two basic values for encoding.  They use the
-// ins_encode keyword to specify their encoding class (which must be one of
-// the class names specified in the encoding block), and they use the
-// opcode keyword to specify, in order, their primary, secondary, and
-// tertiary opcode.  Only the opcode sections which a particular instruction
-// needs for encoding need to be specified.
-encode %{
-  // Build emit functions for each basic byte or larger field in the intel
-  // encoding scheme (opcode, rm, sib, immediate), and call them from C++
-  // code in the enc_class source block.  Emit functions will live in the
-  // main source block for now.  In future, we can generalize this by
-  // adding a syntax that specifies the sizes of fields in an order,
-  // so that the adlc can build the emit functions automagically
-
-  enc_class tlsencode (eRegP dst, eRegP src) %{
-    emit_rm(cbuf, 0x2, $dst$$reg, $src$$reg);
-    emit_d32(cbuf, ThreadLocalStorage::get_thread_ptr_offset() );
-  %}
-
-  enc_class call_epilog %{
-    if( VerifyStackAtCalls ) {
-      // Check that stack depth is unchanged: find majik cookie on stack
-      int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP,-3*VMRegImpl::slots_per_word));
-      if(framesize >= 128) {
-        emit_opcode(cbuf, 0x81); // cmp [esp+0],0xbadb1ood
-        emit_d8(cbuf,0xBC);
-        emit_d8(cbuf,0x24);
-        emit_d32(cbuf,framesize); // Find majik cookie from ESP
-        emit_d32(cbuf, 0xbadb100d);
-      }
-      else {
-        emit_opcode(cbuf, 0x81); // cmp [esp+0],0xbadb1ood
-        emit_d8(cbuf,0x7C);
-        emit_d8(cbuf,0x24);
-        emit_d8(cbuf,framesize); // Find majik cookie from ESP
-        emit_d32(cbuf, 0xbadb100d);
-      }
-      // jmp EQ around INT3
-      emit_opcode(cbuf,0x74);
-      emit_d8(cbuf,1);
-      // Die if stack mismatch
-      emit_opcode(cbuf,0xCC);
-    }
-  %}
-
-%}
-
-// INSTRUCTIONS -- Platform dependent
-
-
-//----------OS and Locking Instructions----------------------------------------
-
-// The prefix of this name is KNOWN by the ADLC and cannot be changed.
-instruct tlsLoadP_prefixLoadP(eRegP t1) %{
-  effect(DEF t1);
-
-  format %{ "MOV    $t1,FS:[0x00] "%}
-  opcode(0x8B, 0x64);
-  ins_encode(OpcS, OpcP, conmemref(t1));
-  ins_pipe( ialu_reg_fat );
-%}
-
-// This name is KNOWN by the ADLC and cannot be changed.
-// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
-// for this guy.
-// %%% Should do this with a clause like:  bottom_type(TypeRawPtr::BOTTOM);
-instruct tlsLoadP(eRegP dst, eRegP t1) %{
-  effect(DEF dst, USE t1);
-
-  format %{ "MOV    $dst,[$t1 + TLS::thread_ptr_offset()]" %}
-  opcode(0x8B);
-  ins_encode(OpcP, tlsencode(dst, t1));
-  ins_pipe( ialu_reg_reg_fat );
-%}
-
-instruct TLS(eRegP dst) %{
-  match(Set dst (ThreadLocal));
-  expand %{
-    eRegP t1;
-    tlsLoadP_prefixLoadP(t1);
-    tlsLoadP(dst, t1);
-  %}
-%}
-
-// Die now
-instruct ShouldNotReachHere( )
-%{
-  match(Halt);
-  // Use the following format syntax
-  format %{ "INT3   ; ShouldNotReachHere" %}
-  opcode(0xCC);
-  ins_encode(OpcP);
-  ins_pipe( pipe_slow );
-%}
-
-//
-// Platform dependent source
-//
-source %{
-
-// emit an interrupt that is caught by the debugger
-void emit_break(CodeBuffer &cbuf) {
-  cbuf.insts()->emit_int8((unsigned char) 0xcc);
-}
-
-void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-  emit_break(cbuf);
-}
-
-
-uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
-  return 1;
-}
-
-
-%}
--- a/src/os_cpu/windows_x86/vm/windows_x86_64.ad	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/os_cpu/windows_x86/vm/windows_x86_64.ad	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
 // This code is free software; you can redistribute it and/or modify it
@@ -67,69 +67,6 @@
     emit_opcode(cbuf, 0xD0 | (R10_enc - 8));
   %}
 
-  enc_class call_epilog %{
-    if (VerifyStackAtCalls) {
-      // Check that stack depth is unchanged: find majik cookie on stack
-      int framesize =
-        ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
-      if (framesize) {
-        if (framesize < 0x80) {
-          emit_opcode(cbuf, Assembler::REX_W);
-          emit_opcode(cbuf, 0x81); // cmpq [rsp+0],0xbadb1ood
-          emit_d8(cbuf, 0x7C);
-          emit_d8(cbuf, 0x24);
-          emit_d8(cbuf, framesize); // Find majik cookie from ESP
-          emit_d32(cbuf, 0xbadb100d);
-        } else {
-          emit_opcode(cbuf, Assembler::REX_W);
-          emit_opcode(cbuf, 0x81); // cmpq [rsp+0],0xbadb1ood
-          emit_d8(cbuf, 0xBC);
-          emit_d8(cbuf, 0x24);
-          emit_d32(cbuf, framesize); // Find majik cookie from ESP
-          emit_d32(cbuf, 0xbadb100d);
-        }
-      }
-      // jmp EQ around INT3
-      // QQQ TODO
-      const int jump_around = 5; // size of call to breakpoint, 1 for CC
-      emit_opcode(cbuf, 0x74);
-      emit_d8(cbuf, jump_around);
-      // QQQ temporary
-      emit_break(cbuf);
-      // Die if stack mismatch
-      // emit_opcode(cbuf,0xCC);
-    }
-  %}
-%}
-
-// INSTRUCTIONS -- Platform dependent
-
-
-//----------OS and Locking Instructions----------------------------------------
-
-// This name is KNOWN by the ADLC and cannot be changed.
-// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
-// for this guy.
-instruct tlsLoadP(r15_RegP dst)
-%{
-  match(Set dst (ThreadLocal));
-  effect(DEF dst);
-
-  size(0);
-  format %{ "# TLS is in R15" %}
-  ins_encode( /*empty encoding*/ );
-  ins_pipe(ialu_reg_reg);
-%}
-
-// Die now
-instruct ShouldNotReachHere( )
-%{
-  match(Halt);
-  // Use the following format syntax
-  format %{ "INT3   ; ShouldNotReachHere" %}
-  opcode(0xCC);
-  ins_encode(OpcP);
-  ins_pipe( pipe_slow );
 %}
 
 //
@@ -142,17 +79,4 @@
   return 13; // movq r10,#addr; callq (r10)
 }
 
-// emit an interrupt that is caught by the debugger
-void emit_break(CodeBuffer &cbuf) {
-  cbuf.insts()->emit_int8((unsigned char) 0xcc);
-}
-
-void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-  emit_break(cbuf);
-}
-
-uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
-  return 1;
-}
-
 %}
--- a/src/share/tools/ProjectCreator/BuildConfig.java	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/tools/ProjectCreator/BuildConfig.java	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -273,6 +273,7 @@
         sysDefines.add("_WINDOWS");
         sysDefines.add("HOTSPOT_BUILD_USER=\\\""+System.getProperty("user.name")+"\\\"");
         sysDefines.add("HOTSPOT_BUILD_TARGET=\\\""+get("Build")+"\\\"");
+        sysDefines.add("INCLUDE_TRACE");
         sysDefines.add("_JNI_IMPLEMENTATION_");
         if (vars.get("PlatformName").equals("Win32")) {
             sysDefines.add("HOTSPOT_LIB_ARCH=\\\"i386\\\"");
--- a/src/share/tools/hsdis/hsdis.c	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/tools/hsdis/hsdis.c	Mon Feb 27 15:06:36 2012 -0800
@@ -356,7 +356,7 @@
       if (plen > mach_size)  plen = mach_size;
       strncpy(mach_option, p, plen);
       mach_option[plen] = '\0';
-    } else if (plen > 6 && strncmp(p, "hsdis-", 6)) {
+    } else if (plen > 6 && strncmp(p, "hsdis-", 6) == 0) {
       // do not pass these to the next level
     } else {
       /* just copy it; {i386,sparc}-dis.c might like to see it  */
--- a/src/share/vm/adlc/formssel.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/adlc/formssel.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -627,6 +627,7 @@
   if( strcmp(_matrule->_opType,"MemBarAcquire") == 0 ) return true;
   if( strcmp(_matrule->_opType,"MemBarReleaseLock") == 0 ) return true;
   if( strcmp(_matrule->_opType,"MemBarAcquireLock") == 0 ) return true;
+  if( strcmp(_matrule->_opType,"MemBarStoreStore") == 0 ) return true;
 
   return false;
 }
@@ -3978,7 +3979,8 @@
     !strcmp(_opType,"MemBarAcquireLock") ||
     !strcmp(_opType,"MemBarReleaseLock") ||
     !strcmp(_opType,"MemBarVolatile" ) ||
-    !strcmp(_opType,"MemBarCPUOrder" ) ;
+    !strcmp(_opType,"MemBarCPUOrder" ) ||
+    !strcmp(_opType,"MemBarStoreStore" );
 }
 
 bool MatchRule::is_ideal_loadPC() const {
--- a/src/share/vm/asm/assembler.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/asm/assembler.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -61,6 +61,7 @@
   _code_limit  = cs->limit();
   _code_pos    = cs->end();
   _oop_recorder= code->oop_recorder();
+  DEBUG_ONLY( _short_branch_delta = 0; )
   if (_code_begin == NULL)  {
     vm_exit_out_of_memory(0, err_msg("CodeCache: no room for %s",
                                      code->name()));
--- a/src/share/vm/asm/assembler.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/asm/assembler.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -241,6 +241,33 @@
   // Make it return true on platforms which need to verify
   // instruction boundaries for some operations.
   inline static bool pd_check_instruction_mark();
+
+  // Add delta to short branch distance to verify that it still fit into imm8.
+  int _short_branch_delta;
+
+  int  short_branch_delta() const { return _short_branch_delta; }
+  void set_short_branch_delta()   { _short_branch_delta = 32; }
+  void clear_short_branch_delta() { _short_branch_delta = 0; }
+
+  class ShortBranchVerifier: public StackObj {
+   private:
+    AbstractAssembler* _assm;
+
+   public:
+    ShortBranchVerifier(AbstractAssembler* assm) : _assm(assm) {
+      assert(assm->short_branch_delta() == 0, "overlapping instructions");
+      _assm->set_short_branch_delta();
+    }
+    ~ShortBranchVerifier() {
+      _assm->clear_short_branch_delta();
+    }
+  };
+  #else
+  // Dummy in product.
+  class ShortBranchVerifier: public StackObj {
+   public:
+    ShortBranchVerifier(AbstractAssembler* assm) {}
+  };
   #endif
 
   // Label functions
--- a/src/share/vm/c1/c1_Canonicalizer.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/c1/c1_Canonicalizer.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -594,6 +594,13 @@
   return false;
 }
 
+static bool is_safepoint(BlockEnd* x, BlockBegin* sux) {
+  // An Instruction with multiple successors, x, is replaced by a Goto
+  // to a single successor, sux. Is a safepoint check needed = was the
+  // instruction being replaced a safepoint and the single remaining
+  // successor a back branch?
+  return x->is_safepoint() && (sux->bci() < x->state_before()->bci());
+}
 
 void Canonicalizer::do_If(If* x) {
   // move const to right
@@ -614,7 +621,7 @@
     case If::geq: sux = x->sux_for(true);  break;
     }
     // If is a safepoint then the debug information should come from the state_before of the If.
-    set_canonical(new Goto(sux, x->state_before(), x->is_safepoint()));
+    set_canonical(new Goto(sux, x->state_before(), is_safepoint(x, sux)));
     return;
   }
 
@@ -626,7 +633,7 @@
                                                        x->sux_for(false));
       if (sux != NULL) {
         // If is a safepoint then the debug information should come from the state_before of the If.
-        set_canonical(new Goto(sux, x->state_before(), x->is_safepoint()));
+        set_canonical(new Goto(sux, x->state_before(), is_safepoint(x, sux)));
       }
     }
   } else if (rt->as_IntConstant() != NULL) {
@@ -694,10 +701,12 @@
     }
   } else if (rt == objectNull && (l->as_NewInstance() || l->as_NewArray())) {
     if (x->cond() == Instruction::eql) {
-      set_canonical(new Goto(x->fsux(), x->state_before(), x->is_safepoint()));
+      BlockBegin* sux = x->fsux();
+      set_canonical(new Goto(sux, x->state_before(), is_safepoint(x, sux)));
     } else {
       assert(x->cond() == Instruction::neq, "only other valid case");
-      set_canonical(new Goto(x->tsux(), x->state_before(), x->is_safepoint()));
+      BlockBegin* sux = x->tsux();
+      set_canonical(new Goto(sux, x->state_before(), is_safepoint(x, sux)));
     }
   }
 }
@@ -710,7 +719,7 @@
     if (v >= x->lo_key() && v <= x->hi_key()) {
       sux = x->sux_at(v - x->lo_key());
     }
-    set_canonical(new Goto(sux, x->state_before(), x->is_safepoint()));
+    set_canonical(new Goto(sux, x->state_before(), is_safepoint(x, sux)));
   } else if (x->number_of_sux() == 1) {
     // NOTE: Code permanently disabled for now since the switch statement's
     //       tag expression may produce side-effects in which case it must
@@ -741,7 +750,7 @@
         sux = x->sux_at(i);
       }
     }
-    set_canonical(new Goto(sux, x->state_before(), x->is_safepoint()));
+    set_canonical(new Goto(sux, x->state_before(), is_safepoint(x, sux)));
   } else if (x->number_of_sux() == 1) {
     // NOTE: Code permanently disabled for now since the switch statement's
     //       tag expression may produce side-effects in which case it must
@@ -899,3 +908,4 @@
 void Canonicalizer::do_ProfileCall(ProfileCall* x) {}
 void Canonicalizer::do_ProfileInvoke(ProfileInvoke* x) {}
 void Canonicalizer::do_RuntimeCall(RuntimeCall* x) {}
+void Canonicalizer::do_MemBar(MemBar* x) {}
--- a/src/share/vm/c1/c1_Canonicalizer.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/c1/c1_Canonicalizer.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -104,6 +104,7 @@
   virtual void do_ProfileCall    (ProfileCall*     x);
   virtual void do_ProfileInvoke  (ProfileInvoke*   x);
   virtual void do_RuntimeCall    (RuntimeCall*     x);
+  virtual void do_MemBar         (MemBar*          x);
 };
 
 #endif // SHARE_VM_C1_C1_CANONICALIZER_HPP
--- a/src/share/vm/c1/c1_GraphBuilder.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/c1/c1_GraphBuilder.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1181,6 +1181,11 @@
   bool is_bb = tsux->bci() < stream()->cur_bci() || fsux->bci() < stream()->cur_bci();
   Instruction *i = append(new If(x, cond, false, y, tsux, fsux, is_bb ? state_before : NULL, is_bb));
 
+  assert(i->as_Goto() == NULL ||
+         (i->as_Goto()->sux_at(0) == tsux  && i->as_Goto()->is_safepoint() == tsux->bci() < stream()->cur_bci()) ||
+         (i->as_Goto()->sux_at(0) == fsux  && i->as_Goto()->is_safepoint() == fsux->bci() < stream()->cur_bci()),
+         "safepoint state of Goto returned by canonicalizer incorrect");
+
   if (is_profiling()) {
     If* if_node = i->as_If();
     if (if_node != NULL) {
@@ -1303,7 +1308,16 @@
     // add default successor
     sux->at_put(i, block_at(bci() + sw.default_offset()));
     ValueStack* state_before = has_bb ? copy_state_before() : NULL;
-    append(new TableSwitch(ipop(), sux, sw.low_key(), state_before, has_bb));
+    Instruction* res = append(new TableSwitch(ipop(), sux, sw.low_key(), state_before, has_bb));
+#ifdef ASSERT
+    if (res->as_Goto()) {
+      for (i = 0; i < l; i++) {
+        if (sux->at(i) == res->as_Goto()->sux_at(0)) {
+          assert(res->as_Goto()->is_safepoint() == sw.dest_offset_at(i) < 0, "safepoint state of Goto returned by canonicalizer incorrect");
+        }
+      }
+    }
+#endif
   }
 }
 
@@ -1338,7 +1352,16 @@
     // add default successor
     sux->at_put(i, block_at(bci() + sw.default_offset()));
     ValueStack* state_before = has_bb ? copy_state_before() : NULL;
-    append(new LookupSwitch(ipop(), sux, keys, state_before, has_bb));
+    Instruction* res = append(new LookupSwitch(ipop(), sux, keys, state_before, has_bb));
+#ifdef ASSERT
+    if (res->as_Goto()) {
+      for (i = 0; i < l; i++) {
+        if (sux->at(i) == res->as_Goto()->sux_at(0)) {
+          assert(res->as_Goto()->is_safepoint() == sw.pair_at(i).offset() < 0, "safepoint state of Goto returned by canonicalizer incorrect");
+        }
+      }
+    }
+#endif
   }
 }
 
@@ -1395,6 +1418,12 @@
     call_register_finalizer();
   }
 
+  bool need_mem_bar = false;
+  if (method()->name() == ciSymbol::object_initializer_name() &&
+      scope()->wrote_final()) {
+    need_mem_bar = true;
+  }
+
   // Check to see whether we are inlining. If so, Return
   // instructions become Gotos to the continuation point.
   if (continuation() != NULL) {
@@ -1414,6 +1443,10 @@
       monitorexit(state()->lock_at(0), SynchronizationEntryBCI);
     }
 
+    if (need_mem_bar) {
+      append(new MemBar(lir_membar_storestore));
+    }
+
     // State at end of inlined method is the state of the caller
     // without the method parameters on stack, including the
     // return value, if any, of the inlined method on operand stack.
@@ -1433,7 +1466,6 @@
     // the continuation point.
     append_with_bci(goto_callee, scope_data()->continuation()->bci());
     incr_num_returns();
-
     return;
   }
 
@@ -1449,6 +1481,10 @@
     append_split(new MonitorExit(receiver, state()->unlock()));
   }
 
+  if (need_mem_bar) {
+      append(new MemBar(lir_membar_storestore));
+  }
+
   append(new Return(x));
 }
 
@@ -1481,6 +1517,9 @@
     }
   }
 
+  if (field->is_final() && (code == Bytecodes::_putfield)) {
+    scope()->set_wrote_final();
+  }
 
   const int offset = !needs_patching ? field->offset() : -1;
   switch (code) {
@@ -1592,6 +1631,7 @@
   // this happened while running the JCK invokevirtual tests under doit.  TKR
   ciMethod* cha_monomorphic_target = NULL;
   ciMethod* exact_target = NULL;
+  Value better_receiver = NULL;
   if (UseCHA && DeoptC1 && klass->is_loaded() && target->is_loaded() &&
       !target->is_method_handle_invoke()) {
     Value receiver = NULL;
@@ -1653,6 +1693,18 @@
       ciInstanceKlass* singleton = NULL;
       if (target->holder()->nof_implementors() == 1) {
         singleton = target->holder()->implementor(0);
+
+        assert(holder->is_interface(), "invokeinterface to non interface?");
+        ciInstanceKlass* decl_interface = (ciInstanceKlass*)holder;
+        // the number of implementors for decl_interface is less or
+        // equal to the number of implementors for target->holder() so
+        // if number of implementors of target->holder() == 1 then
+        // number of implementors for decl_interface is 0 or 1. If
+        // it's 0 then no class implements decl_interface and there's
+        // no point in inlining.
+        if (!holder->is_loaded() || decl_interface->nof_implementors() != 1) {
+          singleton = NULL;
+        }
       }
       if (singleton) {
         cha_monomorphic_target = target->find_monomorphic_target(calling_klass, target->holder(), singleton);
@@ -1667,7 +1719,9 @@
           CheckCast* c = new CheckCast(klass, receiver, copy_state_for_exception());
           c->set_incompatible_class_change_check();
           c->set_direct_compare(klass->is_final());
-          append_split(c);
+          // pass the result of the checkcast so that the compiler has
+          // more accurate type info in the inlinee
+          better_receiver = append_split(c);
         }
       }
     }
@@ -1709,7 +1763,7 @@
       }
       if (!success) {
         // static binding => check if callee is ok
-        success = try_inline(inline_target, (cha_monomorphic_target != NULL) || (exact_target != NULL));
+        success = try_inline(inline_target, (cha_monomorphic_target != NULL) || (exact_target != NULL), better_receiver);
       }
       CHECK_BAILOUT();
 
@@ -3034,7 +3088,7 @@
 }
 
 
-bool GraphBuilder::try_inline(ciMethod* callee, bool holder_known) {
+bool GraphBuilder::try_inline(ciMethod* callee, bool holder_known, Value receiver) {
   // Clear out any existing inline bailout condition
   clear_inline_bailout();
 
@@ -3056,7 +3110,7 @@
   } else if (callee->is_abstract()) {
     INLINE_BAILOUT("abstract")
   } else {
-    return try_inline_full(callee, holder_known);
+    return try_inline_full(callee, holder_known, NULL, receiver);
   }
 }
 
@@ -3405,7 +3459,7 @@
 }
 
 
-bool GraphBuilder::try_inline_full(ciMethod* callee, bool holder_known, BlockBegin* cont_block) {
+bool GraphBuilder::try_inline_full(ciMethod* callee, bool holder_known, BlockBegin* cont_block, Value receiver) {
   assert(!callee->is_native(), "callee must not be native");
   if (CompilationPolicy::policy()->should_not_inline(compilation()->env(), callee)) {
     INLINE_BAILOUT("inlining prohibited by policy");
@@ -3541,6 +3595,9 @@
       Value  arg = caller_state->stack_at_inc(i);
       // NOTE: take base() of arg->type() to avoid problems storing
       // constants
+      if (receiver != NULL && par_no == 0) {
+        arg = receiver;
+      }
       store_local(callee_state, arg, arg->type()->base(), par_no);
     }
   }
@@ -3683,56 +3740,61 @@
       // Get the two MethodHandle inputs from the Phi.
       Value op1 = phi->operand_at(0);
       Value op2 = phi->operand_at(1);
-      ciMethodHandle* mh1 = op1->type()->as_ObjectType()->constant_value()->as_method_handle();
-      ciMethodHandle* mh2 = op2->type()->as_ObjectType()->constant_value()->as_method_handle();
-
-      // Set the callee to have access to the class and signature in
-      // the MethodHandleCompiler.
-      mh1->set_callee(callee);
-      mh1->set_caller(method());
-      mh2->set_callee(callee);
-      mh2->set_caller(method());
-
-      // Get adapters for the MethodHandles.
-      ciMethod* mh1_adapter = mh1->get_method_handle_adapter();
-      ciMethod* mh2_adapter = mh2->get_method_handle_adapter();
-
-      if (mh1_adapter != NULL && mh2_adapter != NULL) {
-        set_inline_cleanup_info();
-
-        // Build the If guard
-        BlockBegin* one = new BlockBegin(next_bci());
-        BlockBegin* two = new BlockBegin(next_bci());
-        BlockBegin* end = new BlockBegin(next_bci());
-        Instruction* iff = append(new If(phi, If::eql, false, op1, one, two, NULL, false));
-        block()->set_end(iff->as_BlockEnd());
-
-        // Connect up the states
-        one->merge(block()->end()->state());
-        two->merge(block()->end()->state());
-
-        // Save the state for the second inlinee
-        ValueStack* state_before = copy_state_before();
-
-        // Parse first adapter
-        _last = _block = one;
-        if (!try_inline_full(mh1_adapter, /*holder_known=*/ true, end)) {
-          restore_inline_cleanup_info();
-          block()->clear_end();  // remove appended iff
-          return false;
+      ObjectType* op1type = op1->type()->as_ObjectType();
+      ObjectType* op2type = op2->type()->as_ObjectType();
+
+      if (op1type->is_constant() && op2type->is_constant()) {
+        ciMethodHandle* mh1 = op1type->constant_value()->as_method_handle();
+        ciMethodHandle* mh2 = op2type->constant_value()->as_method_handle();
+
+        // Set the callee to have access to the class and signature in
+        // the MethodHandleCompiler.
+        mh1->set_callee(callee);
+        mh1->set_caller(method());
+        mh2->set_callee(callee);
+        mh2->set_caller(method());
+
+        // Get adapters for the MethodHandles.
+        ciMethod* mh1_adapter = mh1->get_method_handle_adapter();
+        ciMethod* mh2_adapter = mh2->get_method_handle_adapter();
+
+        if (mh1_adapter != NULL && mh2_adapter != NULL) {
+          set_inline_cleanup_info();
+
+          // Build the If guard
+          BlockBegin* one = new BlockBegin(next_bci());
+          BlockBegin* two = new BlockBegin(next_bci());
+          BlockBegin* end = new BlockBegin(next_bci());
+          Instruction* iff = append(new If(phi, If::eql, false, op1, one, two, NULL, false));
+          block()->set_end(iff->as_BlockEnd());
+
+          // Connect up the states
+          one->merge(block()->end()->state());
+          two->merge(block()->end()->state());
+
+          // Save the state for the second inlinee
+          ValueStack* state_before = copy_state_before();
+
+          // Parse first adapter
+          _last = _block = one;
+          if (!try_inline_full(mh1_adapter, /*holder_known=*/ true, end, NULL)) {
+            restore_inline_cleanup_info();
+            block()->clear_end();  // remove appended iff
+            return false;
+          }
+
+          // Parse second adapter
+          _last = _block = two;
+          _state = state_before;
+          if (!try_inline_full(mh2_adapter, /*holder_known=*/ true, end, NULL)) {
+            restore_inline_cleanup_info();
+            block()->clear_end();  // remove appended iff
+            return false;
+          }
+
+          connect_to_end(end);
+          return true;
         }
-
-        // Parse second adapter
-        _last = _block = two;
-        _state = state_before;
-        if (!try_inline_full(mh2_adapter, /*holder_known=*/ true, end)) {
-          restore_inline_cleanup_info();
-          block()->clear_end();  // remove appended iff
-          return false;
-        }
-
-        connect_to_end(end);
-        return true;
       }
     }
   }
--- a/src/share/vm/c1/c1_GraphBuilder.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/c1/c1_GraphBuilder.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -337,9 +337,9 @@
   void fill_sync_handler(Value lock, BlockBegin* sync_handler, bool default_handler = false);
 
   // inliners
-  bool try_inline(           ciMethod* callee, bool holder_known);
+  bool try_inline(           ciMethod* callee, bool holder_known, Value receiver = NULL);
   bool try_inline_intrinsics(ciMethod* callee);
-  bool try_inline_full(      ciMethod* callee, bool holder_known, BlockBegin* cont_block = NULL);
+  bool try_inline_full(      ciMethod* callee, bool holder_known, BlockBegin* cont_block, Value receiver);
   bool try_inline_jsr(int jsr_dest_bci);
 
   // JSR 292 support
--- a/src/share/vm/c1/c1_IR.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/c1/c1_IR.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -141,6 +141,7 @@
   _xhandlers          = new XHandlers(method);
   _number_of_locks    = 0;
   _monitor_pairing_ok = method->has_balanced_monitors();
+  _wrote_final        = false;
   _start              = NULL;
 
   if (osr_bci == -1) {
--- a/src/share/vm/c1/c1_IR.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/c1/c1_IR.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -149,6 +149,7 @@
   XHandlers*    _xhandlers;                      // the exception handlers
   int           _number_of_locks;                // the number of monitor lock slots needed
   bool          _monitor_pairing_ok;             // the monitor pairing info
+  bool          _wrote_final;                    // has written final field
   BlockBegin*   _start;                          // the start block, successsors are method entries
 
   BitMap        _requires_phi_function;          // bit is set if phi functions at loop headers are necessary for a local variable
@@ -181,6 +182,8 @@
   void          set_min_number_of_locks(int n)   { if (n > _number_of_locks) _number_of_locks = n; }
   bool          monitor_pairing_ok() const       { return _monitor_pairing_ok; }
   BlockBegin*   start() const                    { return _start; }
+  void          set_wrote_final()                { _wrote_final = true; }
+  bool          wrote_final    () const          { return _wrote_final; }
 };
 
 
--- a/src/share/vm/c1/c1_Instruction.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/c1/c1_Instruction.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -107,6 +107,7 @@
 class   ProfileCall;
 class   ProfileInvoke;
 class   RuntimeCall;
+class   MemBar;
 
 // A Value is a reference to the instruction creating the value
 typedef Instruction* Value;
@@ -204,6 +205,7 @@
   virtual void do_ProfileCall    (ProfileCall*     x) = 0;
   virtual void do_ProfileInvoke  (ProfileInvoke*   x) = 0;
   virtual void do_RuntimeCall    (RuntimeCall*     x) = 0;
+  virtual void do_MemBar         (MemBar*          x) = 0;
 };
 
 
@@ -2351,6 +2353,23 @@
   virtual void state_values_do(ValueVisitor*);
 };
 
+LEAF(MemBar, Instruction)
+ private:
+  LIR_Code _code;
+
+ public:
+  MemBar(LIR_Code code)
+    : Instruction(voidType)
+    , _code(code)
+  {
+    pin();
+  }
+
+  LIR_Code code()           { return _code; }
+
+  virtual void input_values_do(ValueVisitor*)   {}
+};
+
 class BlockPair: public CompilationResourceObj {
  private:
   BlockBegin* _from;
--- a/src/share/vm/c1/c1_InstructionPrinter.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/c1/c1_InstructionPrinter.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -855,4 +855,20 @@
   output()->put(')');
 }
 
+void InstructionPrinter::do_MemBar(MemBar* x) {
+  if (os::is_MP()) {
+    LIR_Code code = x->code();
+    switch (code) {
+      case lir_membar_acquire   : output()->print("membar_acquire"); break;
+      case lir_membar_release   : output()->print("membar_release"); break;
+      case lir_membar           : output()->print("membar"); break;
+      case lir_membar_loadload  : output()->print("membar_loadload"); break;
+      case lir_membar_storestore: output()->print("membar_storestore"); break;
+      case lir_membar_loadstore : output()->print("membar_loadstore"); break;
+      case lir_membar_storeload : output()->print("membar_storeload"); break;
+      default                   : ShouldNotReachHere(); break;
+    }
+  }
+}
+
 #endif // PRODUCT
--- a/src/share/vm/c1/c1_InstructionPrinter.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/c1/c1_InstructionPrinter.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -132,6 +132,7 @@
   virtual void do_ProfileCall    (ProfileCall*     x);
   virtual void do_ProfileInvoke  (ProfileInvoke*   x);
   virtual void do_RuntimeCall    (RuntimeCall*     x);
+  virtual void do_MemBar         (MemBar*          x);
 };
 #endif // PRODUCT
 
--- a/src/share/vm/c1/c1_LIR.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/c1/c1_LIR.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -464,6 +464,10 @@
     case lir_membar:                   // result and info always invalid
     case lir_membar_acquire:           // result and info always invalid
     case lir_membar_release:           // result and info always invalid
+    case lir_membar_loadload:          // result and info always invalid
+    case lir_membar_storestore:        // result and info always invalid
+    case lir_membar_loadstore:         // result and info always invalid
+    case lir_membar_storeload:         // result and info always invalid
     {
       assert(op->as_Op0() != NULL, "must be");
       assert(op->_info == NULL, "info not used by this instruction");
@@ -854,6 +858,9 @@
       if (opTypeCheck->_info_for_exception)       do_info(opTypeCheck->_info_for_exception);
       if (opTypeCheck->_info_for_patch)           do_info(opTypeCheck->_info_for_patch);
       if (opTypeCheck->_object->is_valid())       do_input(opTypeCheck->_object);
+      if (op->code() == lir_store_check && opTypeCheck->_object->is_valid()) {
+        do_temp(opTypeCheck->_object);
+      }
       if (opTypeCheck->_array->is_valid())        do_input(opTypeCheck->_array);
       if (opTypeCheck->_tmp1->is_valid())         do_temp(opTypeCheck->_tmp1);
       if (opTypeCheck->_tmp2->is_valid())         do_temp(opTypeCheck->_tmp2);
@@ -1604,6 +1611,10 @@
      case lir_membar:                s = "membar";        break;
      case lir_membar_acquire:        s = "membar_acquire"; break;
      case lir_membar_release:        s = "membar_release"; break;
+     case lir_membar_loadload:       s = "membar_loadload";   break;
+     case lir_membar_storestore:     s = "membar_storestore"; break;
+     case lir_membar_loadstore:      s = "membar_loadstore";  break;
+     case lir_membar_storeload:      s = "membar_storeload";  break;
      case lir_word_align:            s = "word_align";    break;
      case lir_label:                 s = "label";         break;
      case lir_nop:                   s = "nop";           break;
--- a/src/share/vm/c1/c1_LIR.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/c1/c1_LIR.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -866,6 +866,10 @@
       , lir_membar
       , lir_membar_acquire
       , lir_membar_release
+      , lir_membar_loadload
+      , lir_membar_storestore
+      , lir_membar_loadstore
+      , lir_membar_storeload
       , lir_get_thread
   , end_op0
   , begin_op1
@@ -1354,9 +1358,10 @@
   CodeStub*     _stub;   // if this is a branch to a stub, this is the stub
 
  public:
-  LIR_OpBranch(LIR_Condition cond, Label* lbl)
+  LIR_OpBranch(LIR_Condition cond, BasicType type, Label* lbl)
     : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL)
     , _cond(cond)
+    , _type(type)
     , _label(lbl)
     , _block(NULL)
     , _ublock(NULL)
@@ -1917,6 +1922,10 @@
   void membar()                                  { append(new LIR_Op0(lir_membar)); }
   void membar_acquire()                          { append(new LIR_Op0(lir_membar_acquire)); }
   void membar_release()                          { append(new LIR_Op0(lir_membar_release)); }
+  void membar_loadload()                         { append(new LIR_Op0(lir_membar_loadload)); }
+  void membar_storestore()                       { append(new LIR_Op0(lir_membar_storestore)); }
+  void membar_loadstore()                        { append(new LIR_Op0(lir_membar_loadstore)); }
+  void membar_storeload()                        { append(new LIR_Op0(lir_membar_storeload)); }
 
   void nop()                                     { append(new LIR_Op0(lir_nop)); }
   void build_frame()                             { append(new LIR_Op0(lir_build_frame)); }
@@ -2053,7 +2062,7 @@
   void jump(CodeStub* stub) {
     append(new LIR_OpBranch(lir_cond_always, T_ILLEGAL, stub));
   }
-  void branch(LIR_Condition cond, Label* lbl)        { append(new LIR_OpBranch(cond, lbl)); }
+  void branch(LIR_Condition cond, BasicType type, Label* lbl)        { append(new LIR_OpBranch(cond, type, lbl)); }
   void branch(LIR_Condition cond, BasicType type, BlockBegin* block) {
     assert(type != T_FLOAT && type != T_DOUBLE, "no fp comparisons");
     append(new LIR_OpBranch(cond, type, block));
--- a/src/share/vm/c1/c1_LIRAssembler.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/c1/c1_LIRAssembler.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -665,6 +665,22 @@
       membar_release();
       break;
 
+    case lir_membar_loadload:
+      membar_loadload();
+      break;
+
+    case lir_membar_storestore:
+      membar_storestore();
+      break;
+
+    case lir_membar_loadstore:
+      membar_loadstore();
+      break;
+
+    case lir_membar_storeload:
+      membar_storeload();
+      break;
+
     case lir_get_thread:
       get_thread(op->result_opr());
       break;
--- a/src/share/vm/c1/c1_LIRAssembler.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/c1/c1_LIRAssembler.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -241,6 +241,10 @@
   void membar();
   void membar_acquire();
   void membar_release();
+  void membar_loadload();
+  void membar_storestore();
+  void membar_loadstore();
+  void membar_storeload();
   void get_thread(LIR_Opr result);
 
   void verify_oop_map(CodeEmitInfo* info);
--- a/src/share/vm/c1/c1_LIRGenerator.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/c1/c1_LIRGenerator.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1256,8 +1256,7 @@
     info = state_for(x);
   }
   __ move(new LIR_Address(rcvr.result(), oopDesc::klass_offset_in_bytes(), T_OBJECT), result, info);
-  __ move_wide(new LIR_Address(result, Klass::java_mirror_offset_in_bytes() +
-                               klassOopDesc::klass_part_offset_in_bytes(), T_OBJECT), result);
+  __ move_wide(new LIR_Address(result, in_bytes(Klass::java_mirror_offset()), T_OBJECT), result);
 }
 
 
@@ -2351,7 +2350,7 @@
     } else {
       LabelObj* L = new LabelObj();
       __ cmp(lir_cond_less, value, low_key);
-      __ branch(lir_cond_less, L->label());
+      __ branch(lir_cond_less, T_INT, L->label());
       __ cmp(lir_cond_lessEqual, value, high_key);
       __ branch(lir_cond_lessEqual, T_INT, dest);
       __ branch_destination(L->label());
@@ -3166,3 +3165,20 @@
   }
   return result;
 }
+
+void LIRGenerator::do_MemBar(MemBar* x) {
+  if (os::is_MP()) {
+    LIR_Code code = x->code();
+    switch(code) {
+      case lir_membar_acquire   : __ membar_acquire(); break;
+      case lir_membar_release   : __ membar_release(); break;
+      case lir_membar           : __ membar(); break;
+      case lir_membar_loadload  : __ membar_loadload(); break;
+      case lir_membar_storestore: __ membar_storestore(); break;
+      case lir_membar_loadstore : __ membar_loadstore(); break;
+      case lir_membar_storeload : __ membar_storeload(); break;
+      default                   : ShouldNotReachHere(); break;
+    }
+  }
+}
+
--- a/src/share/vm/c1/c1_LIRGenerator.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/c1/c1_LIRGenerator.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -525,6 +525,7 @@
   virtual void do_ProfileCall    (ProfileCall*     x);
   virtual void do_ProfileInvoke  (ProfileInvoke*   x);
   virtual void do_RuntimeCall    (RuntimeCall*     x);
+  virtual void do_MemBar         (MemBar*          x);
 };
 
 
--- a/src/share/vm/c1/c1_LinearScan.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/c1/c1_LinearScan.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -2464,12 +2464,15 @@
 
 
 // frequently used constants
-ConstantOopWriteValue LinearScan::_oop_null_scope_value = ConstantOopWriteValue(NULL);
-ConstantIntValue      LinearScan::_int_m1_scope_value = ConstantIntValue(-1);
-ConstantIntValue      LinearScan::_int_0_scope_value =  ConstantIntValue(0);
-ConstantIntValue      LinearScan::_int_1_scope_value =  ConstantIntValue(1);
-ConstantIntValue      LinearScan::_int_2_scope_value =  ConstantIntValue(2);
-LocationValue         _illegal_value = LocationValue(Location());
+// Allocate them with new so they are never destroyed (otherwise, a
+// forced exit could destroy these objects while they are still in
+// use).
+ConstantOopWriteValue* LinearScan::_oop_null_scope_value = new (ResourceObj::C_HEAP) ConstantOopWriteValue(NULL);
+ConstantIntValue*      LinearScan::_int_m1_scope_value = new (ResourceObj::C_HEAP) ConstantIntValue(-1);
+ConstantIntValue*      LinearScan::_int_0_scope_value =  new (ResourceObj::C_HEAP) ConstantIntValue(0);
+ConstantIntValue*      LinearScan::_int_1_scope_value =  new (ResourceObj::C_HEAP) ConstantIntValue(1);
+ConstantIntValue*      LinearScan::_int_2_scope_value =  new (ResourceObj::C_HEAP) ConstantIntValue(2);
+LocationValue*         _illegal_value = new (ResourceObj::C_HEAP) LocationValue(Location());
 
 void LinearScan::init_compute_debug_info() {
   // cache for frequently used scope values
@@ -2508,7 +2511,7 @@
     case T_OBJECT: {
       jobject value = c->as_jobject();
       if (value == NULL) {
-        scope_values->append(&_oop_null_scope_value);
+        scope_values->append(_oop_null_scope_value);
       } else {
         scope_values->append(new ConstantOopWriteValue(c->as_jobject()));
       }
@@ -2519,10 +2522,10 @@
     case T_FLOAT: {
       int value = c->as_jint_bits();
       switch (value) {
-        case -1: scope_values->append(&_int_m1_scope_value); break;
-        case 0:  scope_values->append(&_int_0_scope_value); break;
-        case 1:  scope_values->append(&_int_1_scope_value); break;
-        case 2:  scope_values->append(&_int_2_scope_value); break;
+        case -1: scope_values->append(_int_m1_scope_value); break;
+        case 0:  scope_values->append(_int_0_scope_value); break;
+        case 1:  scope_values->append(_int_1_scope_value); break;
+        case 2:  scope_values->append(_int_2_scope_value); break;
         default: scope_values->append(new ConstantIntValue(c->as_jint_bits())); break;
       }
       return 1;
@@ -2531,7 +2534,7 @@
     case T_LONG: // fall through
     case T_DOUBLE: {
 #ifdef _LP64
-      scope_values->append(&_int_0_scope_value);
+      scope_values->append(_int_0_scope_value);
       scope_values->append(new ConstantLongValue(c->as_jlong_bits()));
 #else
       if (hi_word_offset_in_bytes > lo_word_offset_in_bytes) {
@@ -2657,7 +2660,7 @@
       }
       // Does this reverse on x86 vs. sparc?
       first =  new LocationValue(loc1);
-      second = &_int_0_scope_value;
+      second = _int_0_scope_value;
 #else
       Location loc1, loc2;
       if (!frame_map()->locations_for_slot(opr->double_stack_ix(), Location::normal, &loc1, &loc2)) {
@@ -2671,7 +2674,7 @@
 #ifdef _LP64
       VMReg rname_first = opr->as_register_lo()->as_VMReg();
       first = new LocationValue(Location::new_reg_loc(Location::lng, rname_first));
-      second = &_int_0_scope_value;
+      second = _int_0_scope_value;
 #else
       VMReg rname_first = opr->as_register_lo()->as_VMReg();
       VMReg rname_second = opr->as_register_hi()->as_VMReg();
@@ -2694,7 +2697,7 @@
       VMReg rname_first  = opr->as_xmm_double_reg()->as_VMReg();
 #  ifdef _LP64
       first = new LocationValue(Location::new_reg_loc(Location::dbl, rname_first));
-      second = &_int_0_scope_value;
+      second = _int_0_scope_value;
 #  else
       first = new LocationValue(Location::new_reg_loc(Location::normal, rname_first));
       // %%% This is probably a waste but we'll keep things as they were for now
@@ -2741,7 +2744,7 @@
 
 #ifdef _LP64
       first = new LocationValue(Location::new_reg_loc(Location::dbl, rname_first));
-      second = &_int_0_scope_value;
+      second = _int_0_scope_value;
 #else
       first = new LocationValue(Location::new_reg_loc(Location::normal, rname_first));
       // %%% This is probably a waste but we'll keep things as they were for now
@@ -2822,7 +2825,7 @@
     }
   } else {
     // append a dummy value because real value not needed
-    scope_values->append(&_illegal_value);
+    scope_values->append(_illegal_value);
     return 1;
   }
 }
@@ -2865,7 +2868,7 @@
     nof_locals = cur_scope->method()->max_locals();
     locals = new GrowableArray<ScopeValue*>(nof_locals);
     for(int i = 0; i < nof_locals; i++) {
-      locals->append(&_illegal_value);
+      locals->append(_illegal_value);
     }
   }
 
--- a/src/share/vm/c1/c1_LinearScan.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/c1/c1_LinearScan.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -160,11 +160,11 @@
   // TODO: cached scope values for registers could be static
   ScopeValueArray           _scope_value_cache;
 
-  static ConstantOopWriteValue _oop_null_scope_value;
-  static ConstantIntValue    _int_m1_scope_value;
-  static ConstantIntValue    _int_0_scope_value;
-  static ConstantIntValue    _int_1_scope_value;
-  static ConstantIntValue    _int_2_scope_value;
+  static ConstantOopWriteValue* _oop_null_scope_value;
+  static ConstantIntValue*    _int_m1_scope_value;
+  static ConstantIntValue*    _int_0_scope_value;
+  static ConstantIntValue*    _int_1_scope_value;
+  static ConstantIntValue*    _int_2_scope_value;
 
   // accessors
   IR*           ir() const                       { return _ir; }
--- a/src/share/vm/c1/c1_Optimizer.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/c1/c1_Optimizer.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -122,18 +122,32 @@
   if (sux != f_goto->default_sux()) return;
 
   // check if at least one word was pushed on sux_state
+  // inlining depths must match
+  ValueStack* if_state = if_->state();
   ValueStack* sux_state = sux->state();
-  if (sux_state->stack_size() <= if_->state()->stack_size()) return;
+  if (if_state->scope()->level() > sux_state->scope()->level()) {
+    while (sux_state->scope() != if_state->scope()) {
+      if_state = if_state->caller_state();
+      assert(if_state != NULL, "states do not match up");
+    }
+  } else if (if_state->scope()->level() < sux_state->scope()->level()) {
+    while (sux_state->scope() != if_state->scope()) {
+      sux_state = sux_state->caller_state();
+      assert(sux_state != NULL, "states do not match up");
+    }
+  }
+
+  if (sux_state->stack_size() <= if_state->stack_size()) return;
 
   // check if phi function is present at end of successor stack and that
   // only this phi was pushed on the stack
-  Value sux_phi = sux_state->stack_at(if_->state()->stack_size());
+  Value sux_phi = sux_state->stack_at(if_state->stack_size());
   if (sux_phi == NULL || sux_phi->as_Phi() == NULL || sux_phi->as_Phi()->block() != sux) return;
-  if (sux_phi->type()->size() != sux_state->stack_size() - if_->state()->stack_size()) return;
+  if (sux_phi->type()->size() != sux_state->stack_size() - if_state->stack_size()) return;
 
   // get the values that were pushed in the true- and false-branch
-  Value t_value = t_goto->state()->stack_at(if_->state()->stack_size());
-  Value f_value = f_goto->state()->stack_at(if_->state()->stack_size());
+  Value t_value = t_goto->state()->stack_at(if_state->stack_size());
+  Value f_value = f_goto->state()->stack_at(if_state->stack_size());
 
   // backend does not support floats
   assert(t_value->type()->base() == f_value->type()->base(), "incompatible types");
@@ -180,11 +194,7 @@
   Goto* goto_ = new Goto(sux, state_before, if_->is_safepoint() || t_goto->is_safepoint() || f_goto->is_safepoint());
 
   // prepare state for Goto
-  ValueStack* goto_state = if_->state();
-  while (sux_state->scope() != goto_state->scope()) {
-    goto_state = goto_state->caller_state();
-    assert(goto_state != NULL, "states do not match up");
-  }
+  ValueStack* goto_state = if_state;
   goto_state = goto_state->copy(ValueStack::StateAfter, goto_state->bci());
   goto_state->push(result->type(), result);
   assert(goto_state->is_same(sux_state), "states must match now");
@@ -499,6 +509,7 @@
   void do_ProfileCall    (ProfileCall*     x);
   void do_ProfileInvoke  (ProfileInvoke*   x);
   void do_RuntimeCall    (RuntimeCall*     x);
+  void do_MemBar         (MemBar*          x);
 };
 
 
@@ -668,6 +679,7 @@
 void NullCheckVisitor::do_ProfileCall    (ProfileCall*     x) { nce()->clear_last_explicit_null_check(); }
 void NullCheckVisitor::do_ProfileInvoke  (ProfileInvoke*   x) {}
 void NullCheckVisitor::do_RuntimeCall    (RuntimeCall*     x) {}
+void NullCheckVisitor::do_MemBar         (MemBar*          x) {}
 
 
 void NullCheckEliminator::visit(Value* p) {
--- a/src/share/vm/c1/c1_Runtime1.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/c1/c1_Runtime1.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -201,17 +201,17 @@
     case slow_subtype_check_id:
     case fpu2long_stub_id:
     case unwind_exception_id:
+    case counter_overflow_id:
+#if defined(SPARC) || defined(PPC)
+    case handle_exception_nofpu_id:  // Unused on sparc
+#endif
+#ifdef GRAAL
     case graal_verify_pointer_id:
     case graal_unwind_exception_call_id:
     case graal_slow_subtype_check_id:
     case graal_arithmetic_frem_id:
     case graal_arithmetic_drem_id:
     case graal_set_deopt_info_id:
-#ifndef TIERED
-    case counter_overflow_id: // Not generated outside the tiered world
-#endif
-#ifdef SPARC
-    case handle_exception_nofpu_id:  // Unused on sparc
 #endif
       break;
 
@@ -421,8 +421,9 @@
     }
     bci = branch_bci + offset;
   }
-
+  assert(!HAS_PENDING_EXCEPTION, "Should not have any exceptions pending");
   osr_nm = CompilationPolicy::policy()->event(enclosing_method, method, branch_bci, bci, level, nm, THREAD);
+  assert(!HAS_PENDING_EXCEPTION, "Event handler should not throw any exceptions");
   return osr_nm;
 }
 
@@ -590,6 +591,7 @@
     continuation = exception_handler_for_pc_helper(thread, exception, pc, nm);
   }
   // Back in JAVA, use no oops DON'T safepoint
+
   // Now check to see if the nmethod we were called from is now deoptimized.
   // If so we must return to the deopt blob and deoptimize the nmethod
   if (nm != NULL && caller_is_deopted()) {
@@ -603,7 +605,6 @@
 
 JRT_ENTRY(void, Runtime1::throw_range_check_exception(JavaThread* thread, int index))
   NOT_PRODUCT(_throw_range_check_exception_count++;)
-  Events::log("throw_range_check");
   char message[jintAsStringSize];
   sprintf(message, "%d", index);
   SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_ArrayIndexOutOfBoundsException(), message);
@@ -612,7 +613,6 @@
 
 JRT_ENTRY(void, Runtime1::throw_index_exception(JavaThread* thread, int index))
   NOT_PRODUCT(_throw_index_exception_count++;)
-  Events::log("throw_index");
   char message[16];
   sprintf(message, "%d", index);
   SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_IndexOutOfBoundsException(), message);
@@ -647,7 +647,7 @@
   SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_IncompatibleClassChangeError());
 JRT_END
 
-
+#ifdef GRAAL
 JRT_ENTRY_NO_ASYNC(void, Runtime1::graal_monitorenter(JavaThread* thread, oopDesc* obj, BasicLock* lock))
   NOT_PRODUCT(_monitorenter_slowcase_cnt++;)
 #ifdef ASSERT
@@ -708,25 +708,17 @@
   }
 JRT_END
 
+#endif
+
 
 JRT_ENTRY_NO_ASYNC(void, Runtime1::monitorenter(JavaThread* thread, oopDesc* obj, BasicObjectLock* lock))
   NOT_PRODUCT(_monitorenter_slowcase_cnt++;)
-#ifdef ASSERT
-  if (TraceGraal >= 3) {
-    tty->print_cr("entered locking slow case with obj=" INTPTR_FORMAT " and lock= " INTPTR_FORMAT, obj, lock);
-  }
   if (PrintBiasedLockingStatistics) {
     Atomic::inc(BiasedLocking::slow_path_entry_count_addr());
   }
-#endif
   Handle h_obj(thread, obj);
   assert(h_obj()->is_oop(), "must be NULL or an object");
   if (UseBiasedLocking) {
-    if (UseFastLocking) {
-      assert(obj == lock->obj(), "must match");
-    } else {
-      lock->set_obj(obj);
-    }
     // Retry fast entry if bias is revoked to avoid unnecessary inflation
     ObjectSynchronizer::fast_enter(h_obj, lock->lock(), true, CHECK);
   } else {
@@ -739,14 +731,6 @@
       ObjectSynchronizer::fast_enter(h_obj, lock->lock(), false, THREAD);
     }
   }
-#ifdef ASSERT
-  if (TraceGraal >= 3) {
-    tty->print_cr("exiting locking lock state: obj=" INTPTR_FORMAT, lock->obj());
-    lock->lock()->print_on(tty);
-    tty->print_cr("");
-    tty->print_cr("done");
-  }
-#endif
 JRT_END
 
 
@@ -758,19 +742,7 @@
   EXCEPTION_MARK;
 
   oop obj = lock->obj();
-
-#ifdef DEBUG
-  if (!obj->is_oop()) {
-    ResetNoHandleMark rhm;
-    nmethod* method = thread->last_frame().cb()->as_nmethod_or_null();
-    if (method != NULL) {
-      tty->print_cr("ERROR in monitorexit in method %s wrong obj " INTPTR_FORMAT, method->name(), obj);
-    }
-    thread->print_stack_on(tty);
-    assert(false, "invalid lock object pointer dected");
-  }
-#endif
-
+  assert(obj->is_oop(), "must be NULL or an object");
   if (UseFastLocking) {
     // When using fast locking, the compiled code has already tried the fast case
     ObjectSynchronizer::slow_exit(obj, lock->lock(), THREAD);
@@ -901,11 +873,7 @@
   // Note also that in the presence of inlining it is not guaranteed
   // that caller_method() == caller_code->method()
 
-
   int bci = vfst.bci();
-
-  Events::log("patch_code @ " INTPTR_FORMAT , caller_frame.pc());
-
   Bytecodes::Code code = caller_method()->java_code_at(bci);
 
 #ifndef PRODUCT
--- a/src/share/vm/c1/c1_Runtime1.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/c1/c1_Runtime1.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -36,7 +36,7 @@
 // The Runtime1 holds all assembly stubs and VM
 // runtime routines needed by code code generated
 // by the Compiler1.
-
+#ifdef GRAAL
 #define RUNTIME1_STUBS(stub, last_entry) \
   stub(dtrace_object_alloc)          \
   stub(unwind_exception)             \
@@ -82,6 +82,42 @@
   stub(graal_create_out_of_bounds_exception) \
   stub(graal_generic_callback)       \
   last_entry(number_of_ids)
+#else
+#define RUNTIME1_STUBS(stub, last_entry) \
+  stub(dtrace_object_alloc)          \
+  stub(unwind_exception)             \
+  stub(forward_exception)            \
+  stub(throw_range_check_failed)       /* throws ArrayIndexOutOfBoundsException */ \
+  stub(throw_index_exception)          /* throws IndexOutOfBoundsException */ \
+  stub(throw_div0_exception)         \
+  stub(throw_null_pointer_exception) \
+  stub(register_finalizer)           \
+  stub(new_instance)                 \
+  stub(fast_new_instance)            \
+  stub(fast_new_instance_init_check) \
+  stub(new_type_array)               \
+  stub(new_object_array)             \
+  stub(new_multi_array)              \
+  stub(handle_exception_nofpu)         /* optimized version that does not preserve fpu registers */ \
+  stub(handle_exception)             \
+  stub(handle_exception_from_callee) \
+  stub(throw_array_store_exception)  \
+  stub(throw_class_cast_exception)   \
+  stub(throw_incompatible_class_change_error)   \
+  stub(slow_subtype_check)           \
+  stub(monitorenter)                 \
+  stub(monitorenter_nofpu)             /* optimized version that does not preserve fpu registers */ \
+  stub(monitorexit)                  \
+  stub(monitorexit_nofpu)              /* optimized version that does not preserve fpu registers */ \
+  stub(deoptimize)                   \
+  stub(access_field_patching)        \
+  stub(load_klass_patching)          \
+  stub(g1_pre_barrier_slow)          \
+  stub(g1_post_barrier_slow)         \
+  stub(fpu2long_stub)                \
+  stub(counter_overflow)             \
+  last_entry(number_of_ids)
+#endif
 
 #define DECLARE_STUB_ID(x)       x ## _id ,
 #define DECLARE_LAST_STUB_ID(x)  x
@@ -160,12 +196,12 @@
   static void throw_class_cast_exception(JavaThread* thread, oopDesc* object);
   static void throw_incompatible_class_change_error(JavaThread* thread);
   static void throw_array_store_exception(JavaThread* thread, oopDesc* object);
-
+  static void monitorenter(JavaThread* thread, oopDesc* obj, BasicObjectLock* lock);
+  static void monitorexit (JavaThread* thread, BasicObjectLock* lock);
+#ifdef GRAAL
   static void graal_monitorenter(JavaThread* thread, oopDesc* obj, BasicLock* lock);
   static void graal_monitorexit (JavaThread* thread, oopDesc* obj, BasicLock* lock);
-
-  static void monitorenter(JavaThread* thread, oopDesc* obj, BasicObjectLock* lock);
-  static void monitorexit (JavaThread* thread, BasicObjectLock* lock);
+#endif
 
   static void deoptimize(JavaThread* thread);
 
--- a/src/share/vm/c1/c1_ValueMap.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/c1/c1_ValueMap.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -125,6 +125,7 @@
             // otherwise it is possible that they are not evaluated
             f->pin(Instruction::PinGlobalValueNumbering);
           }
+          assert(x->type()->tag() == f->type()->tag(), "should have same type");
 
           return f;
 
--- a/src/share/vm/c1/c1_ValueMap.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/c1/c1_ValueMap.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -200,6 +200,7 @@
   void do_ProfileCall    (ProfileCall*     x) { /* nothing to do */ }
   void do_ProfileInvoke  (ProfileInvoke*   x) { /* nothing to do */ };
   void do_RuntimeCall    (RuntimeCall*     x) { /* nothing to do */ };
+  void do_MemBar         (MemBar*          x) { /* nothing to do */ };
 };
 
 
--- a/src/share/vm/ci/bcEscapeAnalyzer.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/ci/bcEscapeAnalyzer.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -359,7 +359,7 @@
       case Bytecodes::_nop:
         break;
       case Bytecodes::_aconst_null:
-        state.apush(empty_map);
+        state.apush(unknown_obj);
         break;
       case Bytecodes::_iconst_m1:
       case Bytecodes::_iconst_0:
@@ -392,6 +392,8 @@
         if (tag.is_long() || tag.is_double()) {
           // Only longs and doubles use 2 stack slots.
           state.lpush();
+        } else if (tag.basic_type() == T_OBJECT) {
+          state.apush(unknown_obj);
         } else {
           state.spush();
         }
--- a/src/share/vm/ci/ciEnv.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/ci/ciEnv.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -290,6 +290,20 @@
   // Return state of appropriate compilability
   int compilable() { return _compilable; }
 
+  const char* retry_message() const {
+    switch (_compilable) {
+      case ciEnv::MethodCompilable_not_at_tier:
+        return "retry at different tier";
+      case ciEnv::MethodCompilable_never:
+        return "not retryable";
+      case ciEnv::MethodCompilable:
+        return NULL;
+      default:
+        ShouldNotReachHere();
+        return NULL;
+    }
+  }
+
   bool break_at_compile() { return _break_at_compile; }
   void set_break_at_compile(bool z) { _break_at_compile = z; }
 
--- a/src/share/vm/ci/ciField.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/ci/ciField.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -306,20 +306,15 @@
   return type;
 }
 
-bool ciField::will_link(ciInstanceKlass* accessing_klass,
-                        Bytecodes::Code bc) {
-  VM_ENTRY_MARK;
-  return will_link_from_vm(accessing_klass, bc);
-}
 
 // ------------------------------------------------------------------
 // ciField::will_link
 //
 // Can a specific access to this field be made without causing
 // link errors?
-bool ciField::will_link_from_vm(ciInstanceKlass* accessing_klass,
+bool ciField::will_link(ciInstanceKlass* accessing_klass,
                         Bytecodes::Code bc) {
-  Thread* THREAD = Thread::current();
+  VM_ENTRY_MARK;
   if (_offset == -1) {
     // at creation we couldn't link to our holder so we need to
     // maintain that stance, otherwise there's no safe way to use this
--- a/src/share/vm/ci/ciInstanceKlass.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/ci/ciInstanceKlass.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -54,7 +54,7 @@
   _flags = ciFlags(access_flags);
   _has_finalizer = access_flags.has_finalizer();
   _has_subklass = ik->subklass() != NULL;
-  _init_state = (instanceKlass::ClassState)ik->get_init_state();
+  _init_state = ik->init_state();
   _nonstatic_field_size = ik->nonstatic_field_size();
   _has_nonstatic_fields = ik->has_nonstatic_fields();
   _nonstatic_fields = NULL; // initialized lazily by compute_nonstatic_fields:
@@ -118,7 +118,7 @@
 void ciInstanceKlass::compute_shared_init_state() {
   GUARDED_VM_ENTRY(
     instanceKlass* ik = get_instanceKlass();
-    _init_state = (instanceKlass::ClassState)ik->get_init_state();
+    _init_state = ik->init_state();
   )
 }
 
--- a/src/share/vm/ci/ciKlass.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/ci/ciKlass.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -72,8 +72,9 @@
   if (this == that) {
     return true;
   }
-
-  //VM_ENTRY_MARK;
+#ifndef GRAAL
+  VM_ENTRY_MARK;
+#endif
   Klass* this_klass = get_Klass();
   klassOop that_klass = that->get_klassOop();
   bool result = this_klass->is_subtype_of(that_klass);
--- a/src/share/vm/ci/ciObjectFactory.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/ci/ciObjectFactory.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -87,10 +87,11 @@
   _ci_objects = new (arena) GrowableArray<ciObject*>(arena, expected_size, 0, NULL);
 
   // If the shared ci objects exist append them to this factory's objects
-
-  /*if (_shared_ci_objects != NULL) {
+#ifndef GRAAL
+  if (_shared_ci_objects != NULL) {
     _ci_objects->appendAll(_shared_ci_objects);
-  }*/
+  }
+#endif
 
   _unloaded_methods = new (arena) GrowableArray<ciMethod*>(arena, 4, 0, NULL);
   _unloaded_klasses = new (arena) GrowableArray<ciKlass*>(arena, 8, 0, NULL);
@@ -219,15 +220,16 @@
 
 
 ciSymbol* ciObjectFactory::get_symbol(Symbol* key) {
-  /*vmSymbols::SID sid = vmSymbols::find_sid(key);
+#ifndef GRAAL
+  vmSymbols::SID sid = vmSymbols::find_sid(key);
   if (sid != vmSymbols::NO_SID) {
     // do not pollute the main cache with it
-    ciSymbol* result = vm_symbol_at(sid);
-    assert(result != NULL, "");
-    return result;
-  }*/
+    return vm_symbol_at(sid);
+  }
 
-  //assert(vmSymbols::find_sid(key) == vmSymbols::NO_SID, "");
+  assert(vmSymbols::find_sid(key) == vmSymbols::NO_SID, "");
+#endif
+
   ciSymbol* s = new (arena()) ciSymbol(key, vmSymbols::NO_SID);
   _symbols->push(s);
   return s;
--- a/src/share/vm/ci/ciTypeFlow.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/ci/ciTypeFlow.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1589,7 +1589,7 @@
   _next = NULL;
   _on_work_list = false;
   _backedge_copy = false;
-  _exception_entry = false;
+  _has_monitorenter = false;
   _trap_bci = -1;
   _trap_index = 0;
   df_init();
@@ -2182,6 +2182,10 @@
         !head->is_clonable_exit(lp))
       continue;
 
+    // Avoid BoxLock merge.
+    if (EliminateNestedLocks && head->has_monitorenter())
+      continue;
+
     // check not already cloned
     if (head->backedge_copy_count() != 0)
       continue;
@@ -2322,6 +2326,10 @@
     // Watch for bailouts.
     if (failing())  return;
 
+    if (str.cur_bc() == Bytecodes::_monitorenter) {
+      block->set_has_monitorenter();
+    }
+
     if (res) {
 
       // We have encountered a trap.  Record it in this block.
--- a/src/share/vm/ci/ciTypeFlow.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/ci/ciTypeFlow.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -544,15 +544,19 @@
     // Has this block been cloned for a loop backedge?
     bool                             _backedge_copy;
 
+    // This block is entry to irreducible loop.
+    bool                             _irreducible_entry;
+
+    // This block has monitor entry point.
+    bool                             _has_monitorenter;
+
     // A pointer used for our internal work list
+    bool                             _on_work_list;      // on the work list
     Block*                           _next;
-    bool                             _on_work_list;      // on the work list
     Block*                           _rpo_next;          // Reverse post order list
 
     // Loop info
     Loop*                            _loop;              // nearest loop
-    bool                             _irreducible_entry; // entry to irreducible loop
-    bool                             _exception_entry;   // entry to exception handler
 
     ciBlock*     ciblock() const     { return _ciblock; }
     StateVector* state() const     { return _state; }
@@ -689,6 +693,8 @@
     bool   is_loop_head() const          { return _loop && _loop->head() == this; }
     void   set_irreducible_entry(bool c) { _irreducible_entry = c; }
     bool   is_irreducible_entry() const  { return _irreducible_entry; }
+    void   set_has_monitorenter()        { _has_monitorenter = true; }
+    bool   has_monitorenter() const      { return _has_monitorenter; }
     bool   is_visited() const            { return has_pre_order(); }
     bool   is_post_visited() const       { return has_post_order(); }
     bool   is_clonable_exit(Loop* lp);
--- a/src/share/vm/classfile/classFileParser.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/classfile/classFileParser.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -45,6 +45,7 @@
 #include "oops/methodOop.hpp"
 #include "oops/symbol.hpp"
 #include "prims/jvmtiExport.hpp"
+#include "prims/jvmtiThreadState.hpp"
 #include "runtime/javaCalls.hpp"
 #include "runtime/perfData.hpp"
 #include "runtime/reflection.hpp"
@@ -1050,7 +1051,7 @@
 
 class FieldAllocationCount: public ResourceObj {
  public:
-  unsigned int count[MAX_FIELD_ALLOCATION_TYPE];
+  u2 count[MAX_FIELD_ALLOCATION_TYPE];
 
   FieldAllocationCount() {
     for (int i = 0; i < MAX_FIELD_ALLOCATION_TYPE; i++) {
@@ -1060,6 +1061,8 @@
 
   FieldAllocationType update(bool is_static, BasicType type) {
     FieldAllocationType atype = basic_type_to_atype(is_static, type);
+    // Make sure there is no overflow with injected fields.
+    assert(count[atype] < 0xFFFF, "More than 65535 fields");
     count[atype]++;
     return atype;
   }
@@ -1070,7 +1073,7 @@
                                               constantPoolHandle cp, bool is_interface,
                                               FieldAllocationCount *fac,
                                               objArrayHandle* fields_annotations,
-                                              int* java_fields_count_ptr, TRAPS) {
+                                              u2* java_fields_count_ptr, TRAPS) {
   ClassFileStream* cfs = stream();
   typeArrayHandle nullHandle;
   cfs->guarantee_more(2, CHECK_(nullHandle));  // length
@@ -2639,8 +2642,11 @@
                                                     TempNewSymbol& parsed_name,
                                                     bool verify,
                                                     TRAPS) {
-  // So that JVMTI can cache class file in the state before retransformable agents
-  // have modified it
+  // When a retransformable agent is attached, JVMTI caches the
+  // class bytes that existed before the first retransformation.
+  // If RedefineClasses() was used before the retransformable
+  // agent attached, then the cached class bytes may not be the
+  // original class bytes.
   unsigned char *cached_class_file_bytes = NULL;
   jint cached_class_file_length;
 
@@ -2660,6 +2666,25 @@
   _max_bootstrap_specifier_index = -1;
 
   if (JvmtiExport::should_post_class_file_load_hook()) {
+    // Get the cached class file bytes (if any) from the class that
+    // is being redefined or retransformed. We use jvmti_thread_state()
+    // instead of JvmtiThreadState::state_for(jt) so we don't allocate
+    // a JvmtiThreadState any earlier than necessary. This will help
+    // avoid the bug described by 7126851.
+    JvmtiThreadState *state = jt->jvmti_thread_state();
+    if (state != NULL) {
+      KlassHandle *h_class_being_redefined =
+                     state->get_class_being_redefined();
+      if (h_class_being_redefined != NULL) {
+        instanceKlassHandle ikh_class_being_redefined =
+          instanceKlassHandle(THREAD, (*h_class_being_redefined)());
+        cached_class_file_bytes =
+          ikh_class_being_redefined->get_cached_class_file_bytes();
+        cached_class_file_length =
+          ikh_class_being_redefined->get_cached_class_file_len();
+      }
+    }
+
     unsigned char* ptr = cfs->buffer();
     unsigned char* end_ptr = cfs->buffer() + cfs->length();
 
@@ -2843,7 +2868,7 @@
       local_interfaces = parse_interfaces(cp, itfs_len, class_loader, protection_domain, _class_name, CHECK_(nullHandle));
     }
 
-    int java_fields_count = 0;
+    u2 java_fields_count = 0;
     // Fields (offsets are filled in later)
     FieldAllocationCount fac;
     objArrayHandle fields_annotations;
--- a/src/share/vm/classfile/classFileParser.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/classfile/classFileParser.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -91,7 +91,7 @@
                                constantPoolHandle cp, bool is_interface,
                                FieldAllocationCount *fac,
                                objArrayHandle* fields_annotations,
-                               int* java_fields_count_ptr, TRAPS);
+                               u2* java_fields_count_ptr, TRAPS);
 
   // Method parsing
   methodHandle parse_method(constantPoolHandle cp, bool is_interface,
--- a/src/share/vm/classfile/classLoader.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/classfile/classLoader.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -566,7 +566,7 @@
   ClassPathEntry* e = _first_entry;
   while (e != NULL) {
     // assume zip entries have been canonicalized
-	if (strcmp(entry->name(), e->name()) == 0) {
+    if (strcmp(entry->name(), e->name()) == 0) {
       return true;
     }
     e = e->next();
--- a/src/share/vm/classfile/dictionary.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/classfile/dictionary.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -618,7 +618,8 @@
   ResourceMark rm;
   HandleMark   hm;
 
-  tty->print_cr("Java system dictionary (classes=%d)", number_of_entries());
+  tty->print_cr("Java system dictionary (table_size=%d, classes=%d)",
+                 table_size(), number_of_entries());
   tty->print_cr("^ indicates that initiating loader is different from "
                 "defining loader");
 
--- a/src/share/vm/classfile/javaClasses.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/classfile/javaClasses.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1347,7 +1347,13 @@
     return _backtrace();
   }
 
-  inline void push(methodOop method, short bci, TRAPS) {
+  inline void push(methodOop method, int bci, TRAPS) {
+    // Smear the -1 bci to 0 since the array only holds unsigned
+    // shorts.  The later line number lookup would just smear the -1
+    // to a 0 even if it could be recorded.
+    if (bci == SynchronizationEntryBCI) bci = 0;
+    assert(bci == (jushort)bci, "doesn't fit");
+
     if (_index >= trace_chunk_size) {
       methodHandle mhandle(THREAD, method);
       expand(CHECK);
@@ -1574,8 +1580,13 @@
   int chunk_count = 0;
 
   for (;!st.at_end(); st.next()) {
-    // add element
-    bcis->ushort_at_put(chunk_count, st.bci());
+    // Add entry and smear the -1 bci to 0 since the array only holds
+    // unsigned shorts.  The later line number lookup would just smear
+    // the -1 to a 0 even if it could be recorded.
+    int bci = st.bci();
+    if (bci == SynchronizationEntryBCI) bci = 0;
+    assert(bci == (jushort)bci, "doesn't fit");
+    bcis->ushort_at_put(chunk_count, bci);
     methods->obj_at_put(chunk_count, st.method());
 
     chunk_count++;
--- a/src/share/vm/classfile/symbolTable.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/classfile/symbolTable.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -204,6 +204,24 @@
   return s;
 }
 
+// Look up the address of the literal in the SymbolTable for this Symbol*
+// Do not create any new symbols
+// Do not increment the reference count to keep this alive
+Symbol** SymbolTable::lookup_symbol_addr(Symbol* sym){
+  unsigned int hash = hash_symbol((char*)sym->bytes(), sym->utf8_length());
+  int index = the_table()->hash_to_index(hash);
+
+  for (HashtableEntry<Symbol*>* e = the_table()->bucket(index); e != NULL; e = e->next()) {
+    if (e->hash() == hash) {
+      Symbol* literal_sym = e->literal();
+      if (sym == literal_sym) {
+        return e->literal_addr();
+      }
+    }
+  }
+  return NULL;
+}
+
 // Suggestion: Push unicode-based lookup all the way into the hashing
 // and probing logic, so there is no need for convert_to_utf8 until
 // an actual new Symbol* is created.
--- a/src/share/vm/classfile/symbolTable.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/classfile/symbolTable.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -144,6 +144,9 @@
 
   static void release(Symbol* sym);
 
+  // Look up the address of the literal in the SymbolTable for this Symbol*
+  static Symbol** lookup_symbol_addr(Symbol* sym);
+
   // jchar (utf16) version of lookups
   static Symbol* lookup_unicode(const jchar* name, int len, TRAPS);
   static Symbol* lookup_only_unicode(const jchar* name, int len, unsigned int& hash);
--- a/src/share/vm/classfile/systemDictionary.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/classfile/systemDictionary.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -64,6 +64,9 @@
 
 
 int         SystemDictionary::_number_of_modifications = 0;
+int         SystemDictionary::_sdgeneration               = 0;
+const int   SystemDictionary::_primelist[_prime_array_size] = {1009,2017,4049,5051,10103,
+              20201,40423,99991};
 
 oop         SystemDictionary::_system_loader_lock_obj     =  NULL;
 
@@ -190,8 +193,7 @@
 // Forwards to resolve_instance_class_or_null
 
 klassOop SystemDictionary::resolve_or_null(Symbol* class_name, Handle class_loader, Handle protection_domain, TRAPS) {
-  // (tw) May we do this?
-  //assert(!THREAD->is_Compiler_thread(), "Can not load classes with the Compiler thread");
+  assert(!THREAD->is_Compiler_thread(), "Can not load classes with the Compiler thread");
   if (FieldType::is_array(class_name)) {
     return resolve_array_class_or_null(class_name, class_loader, protection_domain, CHECK_NULL);
   } else if (FieldType::is_obj(class_name)) {
@@ -1179,8 +1181,8 @@
 
 klassOop SystemDictionary::find_shared_class(Symbol* class_name) {
   if (shared_dictionary() != NULL) {
-    unsigned int d_hash = dictionary()->compute_hash(class_name, Handle());
-    int d_index = dictionary()->hash_to_index(d_hash);
+    unsigned int d_hash = shared_dictionary()->compute_hash(class_name, Handle());
+    int d_index = shared_dictionary()->hash_to_index(d_hash);
     return shared_dictionary()->find_shared_class(d_index, d_hash, class_name);
   } else {
     return NULL;
@@ -1751,7 +1753,21 @@
   placeholders()->oops_do(blk);
 }
 
-
+// Calculate a "good" systemdictionary size based
+// on predicted or current loaded classes count
+int SystemDictionary::calculate_systemdictionary_size(int classcount) {
+  int newsize = _old_default_sdsize;
+  if ((classcount > 0)  && !DumpSharedSpaces) {
+    int desiredsize = classcount/_average_depth_goal;
+    for (newsize = _primelist[_sdgeneration]; _sdgeneration < _prime_array_size -1;
+         newsize = _primelist[++_sdgeneration]) {
+      if (desiredsize <=  newsize) {
+        break;
+      }
+    }
+  }
+  return newsize;
+}
 bool SystemDictionary::do_unloading(BoolObjectClosure* is_alive) {
   bool result = dictionary()->do_unloading(is_alive);
   constraints()->purge_loader_constraints(is_alive);
@@ -1874,7 +1890,8 @@
   // Allocate arrays
   assert(dictionary() == NULL,
          "SystemDictionary should only be initialized once");
-  _dictionary          = new Dictionary(_nof_buckets);
+  _sdgeneration        = 0;
+  _dictionary          = new Dictionary(calculate_systemdictionary_size(PredictedLoadedClassCount));
   _placeholders        = new PlaceholderTable(_nof_buckets);
   _number_of_modifications = 0;
   _loader_constraints  = new LoaderConstraintTable(_loader_constraint_size);
@@ -2132,6 +2149,12 @@
     }
   }
 
+  // Assign a classid if one has not already been assigned.  The
+  // counter does not need to be atomically incremented since this
+  // is only done while holding the SystemDictionary_lock.
+  // All loaded classes get a unique ID.
+  TRACE_INIT_ID(k);
+
   // Check for a placeholder. If there, remove it and make a
   // new system dictionary entry.
   placeholders()->find_and_remove(p_index, p_hash, name, class_loader, THREAD);
@@ -2346,9 +2369,8 @@
   if (spe == NULL || spe->property_oop() == NULL) {
     spe = NULL;
     // Must create lots of stuff here, but outside of the SystemDictionary lock.
-    // (tw) May we do this?
-	//if (THREAD->is_Compiler_thread())
-    //  return NULL;              // do not attempt from within compiler
+    if (THREAD->is_Compiler_thread())
+      return NULL;              // do not attempt from within compiler
     bool for_invokeGeneric = (name_id != vmSymbols::VM_SYMBOL_ENUM_NAME(invokeExact_name));
     bool found_on_bcp = false;
     Handle mt = find_method_handle_type(signature, accessing_klass,
--- a/src/share/vm/classfile/systemDictionary.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/classfile/systemDictionary.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -386,6 +386,8 @@
   // loaders.  Returns "true" iff something was unloaded.
   static bool do_unloading(BoolObjectClosure* is_alive);
 
+  static int calculate_systemdictionary_size(int loadedclasses);
+
   // Applies "f->do_oop" to all root oops in the system dictionary.
   static void oops_do(OopClosure* f);
 
@@ -578,12 +580,20 @@
     _loader_constraint_size = 107,                     // number of entries in constraint table
     _resolution_error_size  = 107,                     // number of entries in resolution error table
     _invoke_method_size     = 139,                     // number of entries in invoke method table
-    _nof_buckets            = 1009                     // number of buckets in hash table
+    _nof_buckets            = 1009,                    // number of buckets in hash table for placeholders
+    _old_default_sdsize     = 1009,                    // backward compat for system dictionary size
+    _prime_array_size       = 8,                       // array of primes for system dictionary size
+    _average_depth_goal     = 3                        // goal for lookup length
   };
 
 
   // Static variables
 
+  // hashtable sizes for system dictionary to allow growth
+  // prime numbers for system dictionary size
+  static int                     _sdgeneration;
+  static const int               _primelist[_prime_array_size];
+
   // Hashtable holding loaded classes.
   static Dictionary*            _dictionary;
 
--- a/src/share/vm/classfile/vmSymbols.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/classfile/vmSymbols.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -369,6 +369,7 @@
   template(run_method_name,                           "run")                                      \
   template(exit_method_name,                          "exit")                                     \
   template(add_method_name,                           "add")                                      \
+  template(remove_method_name,                        "remove")                                   \
   template(parent_name,                               "parent")                                   \
   template(threads_name,                              "threads")                                  \
   template(groups_name,                               "groups")                                   \
@@ -381,6 +382,7 @@
   template(finalize_method_name,                      "finalize")                                 \
   template(reference_lock_name,                       "lock")                                     \
   template(reference_discovered_name,                 "discovered")                               \
+  template(run_finalization_name,                     "runFinalization")                          \
   template(run_finalizers_on_exit_name,               "runFinalizersOnExit")                      \
   template(uncaughtException_name,                    "uncaughtException")                        \
   template(dispatchUncaughtException_name,            "dispatchUncaughtException")                \
--- a/src/share/vm/code/compiledIC.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/code/compiledIC.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -165,7 +165,6 @@
                    instruction_address(), method->print_value_string(), entry);
   }
 
-  Events::log("compiledIC " INTPTR_FORMAT " --> megamorphic " INTPTR_FORMAT, this, (address)method());
   // We can't check this anymore. With lazy deopt we could have already
   // cleaned this IC entry before we even return. This is possible if
   // we ran out of space in the inline cache buffer trying to do the
@@ -529,7 +528,7 @@
   NativeJump*        jump          = nativeJump_at(method_holder->next_instruction_address());
 
   assert(method_holder->data()    == 0           || method_holder->data()    == (intptr_t)callee(), "a) MT-unsafe modification of inline cache");
-// XXX GRAAL : ??
+// TODO(tw): Check what is the correct assert for Graal.
 #ifndef GRAAL
   assert(jump->jump_destination() == (address)-1 || jump->jump_destination() == entry, "b) MT-unsafe modification of inline cache");
 #endif
--- a/src/share/vm/code/nmethod.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/code/nmethod.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -463,6 +463,7 @@
   _speculatively_disconnected = 0;
   _has_unsafe_access          = 0;
   _has_method_handle_invokes  = 0;
+  _lazy_critical_native       = 0;
   _marked_for_deoptimization  = 0;
   _lock_count                 = 0;
   _stack_traversal_mark       = 0;
@@ -707,7 +708,6 @@
       xtty->tail("print_native_nmethod");
     }
   }
-  Events::log("Create nmethod " INTPTR_FORMAT, this);
 }
 
 // For dtrace wrappers
@@ -784,7 +784,6 @@
       xtty->tail("print_dtrace_nmethod");
     }
   }
-  Events::log("Create nmethod " INTPTR_FORMAT, this);
 }
 #endif // def HAVE_DTRACE_H
 
@@ -903,13 +902,6 @@
   if (printnmethods || PrintDebugInfo || PrintRelocations || PrintDependencies || PrintExceptionHandlers) {
     print_nmethod(printnmethods);
   }
-
-  // Note: Do not verify in here as the CodeCache_lock is
-  //       taken which would conflict with the CompiledIC_lock
-  //       which taken during the verification of call sites.
-  //       (was bug - gri 10/25/99)
-
-  Events::log("Create nmethod " INTPTR_FORMAT, this);
 }
 
 
@@ -1415,7 +1407,7 @@
   assert_locked_or_safepoint(CodeCache_lock);
 
   // completely deallocate this method
-  EventMark m("flushing nmethod " INTPTR_FORMAT " %s", this, "");
+  Events::log(JavaThread::current(), "flushing nmethod " INTPTR_FORMAT, this);
   if (PrintMethodFlushing) {
     tty->print_cr("*flushing nmethod %3d/" INTPTR_FORMAT ". Live blobs:" UINT32_FORMAT "/Free CodeCache:" SIZE_FORMAT "Kb",
         _compile_id, this, CodeCache::nof_blobs(), CodeCache::unallocated_capacity()/1024);
--- a/src/share/vm/code/nmethod.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/code/nmethod.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -177,6 +177,7 @@
   // set during construction
   unsigned int _has_unsafe_access:1;         // May fault due to unsafe access.
   unsigned int _has_method_handle_invokes:1; // Has this method MethodHandle invokes?
+  unsigned int _lazy_critical_native:1;      // Lazy JNI critical native
 
   // Protected by Patching_lock
   unsigned char _state;                      // {alive, not_entrant, zombie, unloaded}
@@ -432,7 +433,10 @@
   void  set_has_method_handle_invokes(bool z)     { _has_method_handle_invokes = z; }
 
   bool  is_speculatively_disconnected() const     { return _speculatively_disconnected; }
-  void  set_speculatively_disconnected(bool z)     { _speculatively_disconnected = z; }
+  void  set_speculatively_disconnected(bool z)    { _speculatively_disconnected = z; }
+
+  bool  is_lazy_critical_native() const           { return _lazy_critical_native; }
+  void  set_lazy_critical_native(bool z)          { _lazy_critical_native = z; }
 
   int   comp_level() const                        { return _comp_level; }
 
--- a/src/share/vm/compiler/compileBroker.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/compiler/compileBroker.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -47,6 +47,7 @@
 #ifdef GRAAL
 #include "graal/graalCompiler.hpp"
 #endif
+#include "utilities/events.hpp"
 #ifdef COMPILER1
 #include "c1/c1_Compiler.hpp"
 #endif
@@ -192,6 +193,44 @@
 GrowableArray<CompilerThread*>* CompileBroker::_method_threads = NULL;
 
 
+class CompilationLog : public StringEventLog {
+ public:
+  CompilationLog() : StringEventLog("Compilation events") {
+  }
+
+  void log_compile(JavaThread* thread, CompileTask* task) {
+    StringLogMessage lm;
+    stringStream msg = lm.stream();
+    // msg.time_stamp().update_to(tty->time_stamp().ticks());
+    task->print_compilation(&msg, true);
+    log(thread, "%s", (const char*)lm);
+  }
+
+  void log_nmethod(JavaThread* thread, nmethod* nm) {
+    log(thread, "nmethod %d%s " INTPTR_FORMAT " code ["INTPTR_FORMAT ", " INTPTR_FORMAT "]",
+        nm->compile_id(), nm->is_osr_method() ? "%" : "",
+        nm, nm->code_begin(), nm->code_end());
+  }
+
+  void log_failure(JavaThread* thread, CompileTask* task, const char* reason, const char* retry_message) {
+    StringLogMessage lm;
+    lm.print("%4d   COMPILE SKIPPED: %s", task->compile_id(), reason);
+    if (retry_message != NULL) {
+      lm.append(" (%s)", retry_message);
+    }
+    lm.print("\n");
+    log(thread, "%s", (const char*)lm);
+  }
+};
+
+static CompilationLog* _compilation_log = NULL;
+
+void compileBroker_init() {
+  if (LogEvents) {
+    _compilation_log = new CompilationLog();
+  }
+}
+
 CompileTaskWrapper::CompileTaskWrapper(CompileTask* task) {
   CompilerThread* thread = CompilerThread::current();
   thread->set_task(task);
@@ -329,8 +368,12 @@
 
 // ------------------------------------------------------------------
 // CompileTask::print_compilation_impl
-void CompileTask::print_compilation_impl(outputStream* st, methodOop method, int compile_id, int comp_level, bool is_osr_method, int osr_bci, bool is_blocking, const char* msg) {
-  st->print("%7d ", (int) st->time_stamp().milliseconds());  // print timestamp
+void CompileTask::print_compilation_impl(outputStream* st, methodOop method, int compile_id, int comp_level,
+                                         bool is_osr_method, int osr_bci, bool is_blocking,
+                                         const char* msg, bool short_form) {
+  if (!short_form) {
+    st->print("%7d ", (int) st->time_stamp().milliseconds());  // print timestamp
+  }
   st->print("%4d ", compile_id);    // print compilation number
 
   // For unloaded methods the transition to zombie occurs after the
@@ -373,7 +416,9 @@
   if (msg != NULL) {
     st->print("   %s", msg);
   }
-  st->cr();
+  if (!short_form) {
+    st->cr();
+  }
 }
 
 // ------------------------------------------------------------------
@@ -429,12 +474,12 @@
 
 // ------------------------------------------------------------------
 // CompileTask::print_compilation
-void CompileTask::print_compilation(outputStream* st) {
+void CompileTask::print_compilation(outputStream* st, bool short_form) {
   oop rem = JNIHandles::resolve(method_handle());
   assert(rem != NULL && rem->is_method(), "must be");
   methodOop method = (methodOop) rem;
   bool is_osr_method = osr_bci() != InvocationEntryBci;
-  print_compilation_impl(st, method, compile_id(), comp_level(), is_osr_method, osr_bci(), is_blocking());
+  print_compilation_impl(st, method, compile_id(), comp_level(), is_osr_method, osr_bci(), is_blocking(), NULL, short_form);
 }
 
 // ------------------------------------------------------------------
@@ -682,15 +727,15 @@
   // Set the interface to the current compiler(s).
   int c1_count = CompilationPolicy::policy()->compiler_count(CompLevel_simple);
   int c2_count = CompilationPolicy::policy()->compiler_count(CompLevel_full_optimization);
-#ifdef GRAAL
+#if defined(GRAAL)
   _compilers[0] = new GraalCompiler();
-#else
-#ifdef COMPILER1
+  c1_count = 0;
+  c2_count = 0;
+#elif defined(COMPILER1)
   if (c1_count > 0) {
     _compilers[0] = new Compiler();
   }
 #endif // COMPILER1
-#endif
 
 #ifdef COMPILER2
   if (c2_count > 0) {
@@ -862,23 +907,23 @@
     // Note that this only sets the JavaThread _priority field, which by
     // definition is limited to Java priorities and not OS priorities.
     // The os-priority is set in the CompilerThread startup code itself
+
     java_lang_Thread::set_priority(thread_oop(), NearMaxPriority);
-    // CLEANUP PRIORITIES: This -if- statement hids a bug whereby the compiler
-    // threads never have their OS priority set.  The assumption here is to
-    // enable the Performance group to do flag tuning, figure out a suitable
-    // CompilerThreadPriority, and then remove this 'if' statement (and
-    // comment) and unconditionally set the priority.
+
+    // Note that we cannot call os::set_priority because it expects Java
+    // priorities and we are *explicitly* using OS priorities so that it's
+    // possible to set the compiler thread priority higher than any Java
+    // thread.
 
-    // Compiler Threads should be at the highest Priority
-    if ( CompilerThreadPriority != -1 )
-      os::set_native_priority( compiler_thread, CompilerThreadPriority );
-    else
-      os::set_native_priority( compiler_thread, os::java_to_os_priority[NearMaxPriority]);
-
-      // Note that I cannot call os::set_priority because it expects Java
-      // priorities and I am *explicitly* using OS priorities so that it's
-      // possible to set the compiler thread priority higher than any Java
-      // thread.
+    int native_prio = CompilerThreadPriority;
+    if (native_prio == -1) {
+      if (UseCriticalCompilerThreadPriority) {
+        native_prio = os::java_to_os_priority[CriticalPriority];
+      } else {
+        native_prio = os::java_to_os_priority[NearMaxPriority];
+      }
+    }
+    os::set_native_priority(compiler_thread, native_prio);
 
     java_lang_Thread::set_daemon(thread_oop());
 
@@ -886,6 +931,7 @@
     Threads::add(compiler_thread);
     Thread::start(compiler_thread);
   }
+
   // Let go of Threads_lock before yielding
   os::yield(); // make sure that the compiler thread is started early (especially helpful on SOLARIS)
 
@@ -899,7 +945,7 @@
 // Initialize the compilation queue
 void CompileBroker::init_compiler_threads(int c1_compiler_count, int c2_compiler_count) {
   EXCEPTION_MARK;
-#if !defined(ZERO) && !defined(SHARK)
+#if !defined(ZERO) && !defined(SHARK) && !defined(GRAAL)
   assert(c2_compiler_count > 0 || c1_compiler_count > 0, "No compilers?");
 #endif // !ZERO && !SHARK
   if (c2_compiler_count > 0) {
@@ -969,7 +1015,7 @@
                                         methodHandle hot_method,
                                         int hot_count,
                                         const char* comment,
-                                        TRAPS) {
+                                        Thread* thread) {
   // do nothing if compiler thread(s) is not available
   if (!_initialized ) {
     return;
@@ -1037,6 +1083,14 @@
   if (instanceRefKlass::owns_pending_list_lock(JavaThread::current())) {
     return;
   }
+#ifdef GRAAL
+  if (!JavaThread::current()->is_compiling()) {
+    method->set_queued_for_compilation();
+    GraalCompiler::instance()->compile_method(method, osr_bci, is_compile_blocking(method, osr_bci));
+  } else {
+    // Recursive compile request => ignore.
+  }
+#else
 
   // Outputs from the following MutexLocker block:
   CompileTask* task     = NULL;
@@ -1045,15 +1099,7 @@
 
   // Acquire our lock.
   {
-    MutexLocker locker(queue->lock(), THREAD);
-
-    if (JavaThread::current()->is_compiling() && !BackgroundCompilation) {
-#ifdef GRAAL
-      TRACE_graal_1("Recursive compile %s!", method->name_and_sig_as_C_string());
-#endif
-      method->set_not_compilable();
-      return;
-    }
+    MutexLocker locker(queue->lock(), thread);
 
     // Make sure the method has not slipped into the queues since
     // last we checked; note that those checks were "fast bail-outs".
@@ -1119,24 +1165,13 @@
     // and in that case it's best to protect both the testing (here) of
     // these bits, and their updating (here and elsewhere) under a
     // common lock.
-#ifndef GRAAL
     task = create_compile_task(queue,
                                compile_id, method,
                                osr_bci, comp_level,
                                hot_method, hot_count, comment,
                                blocking);
-#endif
   }
 
-#ifdef GRAAL
-  if (!JavaThread::current()->is_compiling()) {
-    method->set_queued_for_compilation();
-    GraalCompiler::instance()->compile_method(method, osr_bci, blocking);
-  } else {
-    // Recursive compile request => ignore.
-  }
-#endif
-#ifndef GRAAL
   if (blocking) {
     wait_for_completion(task);
   }
@@ -1147,7 +1182,7 @@
 nmethod* CompileBroker::compile_method(methodHandle method, int osr_bci,
                                        int comp_level,
                                        methodHandle hot_method, int hot_count,
-                                       const char* comment, TRAPS) {
+                                       const char* comment, Thread* THREAD) {
   // make sure arguments make sense
   assert(method->method_holder()->klass_part()->oop_is_instance(), "not an instance method");
   assert(osr_bci == InvocationEntryBci || (0 <= osr_bci && osr_bci < method->code_size()), "bci out of range");
@@ -1201,10 +1236,10 @@
   assert(!HAS_PENDING_EXCEPTION, "No exception should be present");
   // some prerequisites that are compiler specific
   if (compiler(comp_level)->is_c2() || compiler(comp_level)->is_shark()) {
-    method->constants()->resolve_string_constants(CHECK_0);
+    method->constants()->resolve_string_constants(CHECK_AND_CLEAR_NULL);
     // Resolve all classes seen in the signature of the method
     // we are compiling.
-    methodOopDesc::load_signature_classes(method, CHECK_0);
+    methodOopDesc::load_signature_classes(method, CHECK_AND_CLEAR_NULL);
   }
 
   // If the method is native, do the lookup in the thread requesting
@@ -1258,7 +1293,7 @@
       return NULL;
     }
   } else {
-    compile_method_base(method, osr_bci, comp_level, hot_method, hot_count, comment, CHECK_0);
+    compile_method_base(method, osr_bci, comp_level, hot_method, hot_count, comment, THREAD);
   }
 
   // return requested nmethod
@@ -1676,6 +1711,10 @@
   CompilerThread* thread = CompilerThread::current();
   ResourceMark rm(thread);
 
+  if (LogEvents) {
+    _compilation_log->log_compile(thread, task);
+  }
+
   // Common flags.
   uint compile_id = task->compile_id();
   int osr_bci = task->osr_bci();
@@ -1744,22 +1783,30 @@
       ci_env.record_method_not_compilable("compile failed", !TieredCompilation);
     }
 
+    // Copy this bit to the enclosing block:
+    compilable = ci_env.compilable();
+
     if (ci_env.failing()) {
-      // Copy this bit to the enclosing block:
-      compilable = ci_env.compilable();
+      const char* retry_message = ci_env.retry_message();
+      if (_compilation_log != NULL) {
+        _compilation_log->log_failure(thread, task, ci_env.failure_reason(), retry_message);
+      }
       if (PrintCompilation) {
-        const char* reason = ci_env.failure_reason();
-        if (compilable == ciEnv::MethodCompilable_not_at_tier) {
-          tty->print_cr("%4d   COMPILE SKIPPED: %s (retry at different tier)", compile_id, reason);
-        } else if (compilable == ciEnv::MethodCompilable_never) {
-          tty->print_cr("%4d   COMPILE SKIPPED: %s (not retryable)", compile_id, reason);
-        } else if (compilable == ciEnv::MethodCompilable) {
-          tty->print_cr("%4d   COMPILE SKIPPED: %s", compile_id, reason);
+        tty->print("%4d   COMPILE SKIPPED: %s", compile_id, ci_env.failure_reason());
+        if (retry_message != NULL) {
+          tty->print(" (%s)", retry_message);
         }
+        tty->cr();
       }
     } else {
       task->mark_success();
       task->set_num_inlined_bytecodes(ci_env.num_inlined_bytecodes());
+      if (_compilation_log != NULL) {
+        nmethod* code = task->code();
+        if (code != NULL) {
+          _compilation_log->log_nmethod(thread, code);
+        }
+      }
     }
   }
   pop_jni_handle_block();
--- a/src/share/vm/compiler/compileBroker.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/compiler/compileBroker.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -98,12 +98,16 @@
   void         set_prev(CompileTask* prev)       { _prev = prev; }
 
 private:
-  static void  print_compilation_impl(outputStream* st, methodOop method, int compile_id, int comp_level, bool is_osr_method = false, int osr_bci = -1, bool is_blocking = false, const char* msg = NULL);
+  static void  print_compilation_impl(outputStream* st, methodOop method, int compile_id, int comp_level,
+                                      bool is_osr_method = false, int osr_bci = -1, bool is_blocking = false,
+                                      const char* msg = NULL, bool short_form = false);
 
 public:
-  void         print_compilation(outputStream* st = tty);
+  void         print_compilation(outputStream* st = tty, bool short_form = false);
   static void  print_compilation(outputStream* st, const nmethod* nm, const char* msg = NULL) {
-    print_compilation_impl(st, nm->method(), nm->compile_id(), nm->comp_level(), nm->is_osr_method(), nm->is_osr_method() ? nm->osr_entry_bci() : -1, /*is_blocking*/ false, msg);
+    print_compilation_impl(st, nm->method(), nm->compile_id(), nm->comp_level(),
+                           nm->is_osr_method(), nm->is_osr_method() ? nm->osr_entry_bci() : -1, /*is_blocking*/ false,
+                           msg);
   }
 
   static void  print_inlining(outputStream* st, ciMethod* method, int inline_level, int bci, const char* msg = NULL);
@@ -333,7 +337,7 @@
                                   methodHandle hot_method,
                                   int hot_count,
                                   const char* comment,
-                                  TRAPS);
+                                  Thread* thread);
   static CompileQueue* compile_queue(int comp_level) {
     if (is_c2_compile(comp_level)) return _c2_method_queue;
     if (is_c1_compile(comp_level)) return _c1_method_queue;
@@ -363,7 +367,7 @@
                                  int comp_level,
                                  methodHandle hot_method,
                                  int hot_count,
-                                 const char* comment, TRAPS);
+                                 const char* comment, Thread* thread);
 
   static void compiler_thread_loop();
 
--- a/src/share/vm/compiler/oopMap.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/compiler/oopMap.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -255,7 +255,7 @@
   if(om_count() > 0) {
     OopMap* last = at(om_count()-1);
     if (last->offset() == map->offset() ) {
-      fatal(err_msg("OopMap inserted twice (offset=%d)", last->offset()));
+      fatal("OopMap inserted twice");
     }
     if(last->offset() > map->offset()) {
       tty->print_cr( "WARNING, maps not sorted: pc[%d]=%d, pc[%d]=%d",
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -2598,7 +2598,7 @@
 AdaptiveWeightedAverage CFLS_LAB::_blocks_to_claim[]    =
   VECTOR_257(AdaptiveWeightedAverage(OldPLABWeight, (float)CMSParPromoteBlocksToClaim));
 size_t CFLS_LAB::_global_num_blocks[]  = VECTOR_257(0);
-int    CFLS_LAB::_global_num_workers[] = VECTOR_257(0);
+uint   CFLS_LAB::_global_num_workers[] = VECTOR_257(0);
 
 CFLS_LAB::CFLS_LAB(CompactibleFreeListSpace* cfls) :
   _cfls(cfls)
@@ -2732,7 +2732,7 @@
         // Update globals stats for num_blocks used
         _global_num_blocks[i] += (_num_blocks[i] - num_retire);
         _global_num_workers[i]++;
-        assert(_global_num_workers[i] <= (ssize_t)ParallelGCThreads, "Too big");
+        assert(_global_num_workers[i] <= ParallelGCThreads, "Too big");
         if (num_retire > 0) {
           _cfls->_indexedFreeList[i].prepend(&_indexedFreeList[i]);
           // Reset this list.
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -336,12 +336,6 @@
                      unallocated_block() : end());
   }
 
-  // This is needed because the default implementation uses block_start()
-  // which can;t be used at certain times (for example phase 3 of mark-sweep).
-  // A better fix is to change the assertions in phase 3 of mark-sweep to
-  // use is_in_reserved(), but that is deferred since the is_in() assertions
-  // are buried through several layers of callers and are used elsewhere
-  // as well.
   bool is_in(const void* p) const {
     return used_region().contains(p);
   }
@@ -637,7 +631,7 @@
   static AdaptiveWeightedAverage
                  _blocks_to_claim  [CompactibleFreeListSpace::IndexSetSize];
   static size_t _global_num_blocks [CompactibleFreeListSpace::IndexSetSize];
-  static int    _global_num_workers[CompactibleFreeListSpace::IndexSetSize];
+  static uint   _global_num_workers[CompactibleFreeListSpace::IndexSetSize];
   size_t        _num_blocks        [CompactibleFreeListSpace::IndexSetSize];
 
   // Internal work method
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -3779,7 +3779,7 @@
     terminator()->reset_for_reuse(active_workers);
   }
 
-  void work(int i);
+  void work(uint worker_id);
   bool should_yield() {
     return    ConcurrentMarkSweepThread::should_yield()
            && !_collector->foregroundGCIsActive()
@@ -3852,7 +3852,7 @@
 //    . if neither is available, offer termination
 // -- Terminate and return result
 //
-void CMSConcMarkingTask::work(int i) {
+void CMSConcMarkingTask::work(uint worker_id) {
   elapsedTimer _timer;
   ResourceMark rm;
   HandleMark hm;
@@ -3860,37 +3860,40 @@
   DEBUG_ONLY(_collector->verify_overflow_empty();)
 
   // Before we begin work, our work queue should be empty
-  assert(work_queue(i)->size() == 0, "Expected to be empty");
+  assert(work_queue(worker_id)->size() == 0, "Expected to be empty");
   // Scan the bitmap covering _cms_space, tracing through grey objects.
   _timer.start();
-  do_scan_and_mark(i, _cms_space);
+  do_scan_and_mark(worker_id, _cms_space);
   _timer.stop();
   if (PrintCMSStatistics != 0) {
     gclog_or_tty->print_cr("Finished cms space scanning in %dth thread: %3.3f sec",
-      i, _timer.seconds()); // XXX: need xxx/xxx type of notation, two timers
+      worker_id, _timer.seconds());
+      // XXX: need xxx/xxx type of notation, two timers
   }
 
   // ... do the same for the _perm_space
   _timer.reset();
   _timer.start();
-  do_scan_and_mark(i, _perm_space);
+  do_scan_and_mark(worker_id, _perm_space);
   _timer.stop();
   if (PrintCMSStatistics != 0) {
     gclog_or_tty->print_cr("Finished perm space scanning in %dth thread: %3.3f sec",
-      i, _timer.seconds()); // XXX: need xxx/xxx type of notation, two timers
+      worker_id, _timer.seconds());
+      // XXX: need xxx/xxx type of notation, two timers
   }
 
   // ... do work stealing
   _timer.reset();
   _timer.start();
-  do_work_steal(i);
+  do_work_steal(worker_id);
   _timer.stop();
   if (PrintCMSStatistics != 0) {
     gclog_or_tty->print_cr("Finished work stealing in %dth thread: %3.3f sec",
-      i, _timer.seconds()); // XXX: need xxx/xxx type of notation, two timers
+      worker_id, _timer.seconds());
+      // XXX: need xxx/xxx type of notation, two timers
   }
   assert(_collector->_markStack.isEmpty(), "Should have been emptied");
-  assert(work_queue(i)->size() == 0, "Should have been emptied");
+  assert(work_queue(worker_id)->size() == 0, "Should have been emptied");
   // Note that under the current task protocol, the
   // following assertion is true even of the spaces
   // expanded since the completion of the concurrent
@@ -3946,7 +3949,7 @@
   // We allow that there may be no tasks to do here because
   // we are restarting after a stack overflow.
   assert(pst->valid() || n_tasks == 0, "Uninitialized use?");
-  int nth_task = 0;
+  uint nth_task = 0;
 
   HeapWord* aligned_start = sp->bottom();
   if (sp->used_region().contains(_restart_addr)) {
@@ -5075,7 +5078,7 @@
   ParallelTaskTerminator* terminator() { return &_term; }
   int n_workers() { return _n_workers; }
 
-  void work(int i);
+  void work(uint worker_id);
 
  private:
   // Work method in support of parallel rescan ... of young gen spaces
@@ -5096,7 +5099,7 @@
 // also is passed to do_dirty_card_rescan_tasks() and to
 // do_work_steal() to select the i-th task_queue.
 
-void CMSParRemarkTask::work(int i) {
+void CMSParRemarkTask::work(uint worker_id) {
   elapsedTimer _timer;
   ResourceMark rm;
   HandleMark   hm;
@@ -5107,7 +5110,7 @@
   Par_MarkRefsIntoAndScanClosure par_mrias_cl(_collector,
     _collector->_span, _collector->ref_processor(),
     &(_collector->_markBitMap),
-    work_queue(i), &(_collector->_revisitStack));
+    work_queue(worker_id), &(_collector->_revisitStack));
 
   // Rescan young gen roots first since these are likely
   // coarsely partitioned and may, on that account, constitute
@@ -5128,15 +5131,15 @@
     assert(ect <= _collector->_eden_chunk_capacity, "out of bounds");
     assert(sct <= _collector->_survivor_chunk_capacity, "out of bounds");
 
-    do_young_space_rescan(i, &par_mrias_cl, to_space, NULL, 0);
-    do_young_space_rescan(i, &par_mrias_cl, from_space, sca, sct);
-    do_young_space_rescan(i, &par_mrias_cl, eden_space, eca, ect);
+    do_young_space_rescan(worker_id, &par_mrias_cl, to_space, NULL, 0);
+    do_young_space_rescan(worker_id, &par_mrias_cl, from_space, sca, sct);
+    do_young_space_rescan(worker_id, &par_mrias_cl, eden_space, eca, ect);
 
     _timer.stop();
     if (PrintCMSStatistics != 0) {
       gclog_or_tty->print_cr(
         "Finished young gen rescan work in %dth thread: %3.3f sec",
-        i, _timer.seconds());
+        worker_id, _timer.seconds());
     }
   }
 
@@ -5158,7 +5161,7 @@
   if (PrintCMSStatistics != 0) {
     gclog_or_tty->print_cr(
       "Finished remaining root rescan work in %dth thread: %3.3f sec",
-      i, _timer.seconds());
+      worker_id, _timer.seconds());
   }
 
   // ---------- rescan dirty cards ------------
@@ -5167,26 +5170,26 @@
 
   // Do the rescan tasks for each of the two spaces
   // (cms_space and perm_space) in turn.
-  // "i" is passed to select the "i-th" task_queue
-  do_dirty_card_rescan_tasks(_cms_space, i, &par_mrias_cl);
-  do_dirty_card_rescan_tasks(_perm_space, i, &par_mrias_cl);
+  // "worker_id" is passed to select the task_queue for "worker_id"
+  do_dirty_card_rescan_tasks(_cms_space, worker_id, &par_mrias_cl);
+  do_dirty_card_rescan_tasks(_perm_space, worker_id, &par_mrias_cl);
   _timer.stop();
   if (PrintCMSStatistics != 0) {
     gclog_or_tty->print_cr(
       "Finished dirty card rescan work in %dth thread: %3.3f sec",
-      i, _timer.seconds());
+      worker_id, _timer.seconds());
   }
 
   // ---------- steal work from other threads ...
   // ---------- ... and drain overflow list.
   _timer.reset();
   _timer.start();
-  do_work_steal(i, &par_mrias_cl, _collector->hash_seed(i));
+  do_work_steal(worker_id, &par_mrias_cl, _collector->hash_seed(worker_id));
   _timer.stop();
   if (PrintCMSStatistics != 0) {
     gclog_or_tty->print_cr(
       "Finished work stealing in %dth thread: %3.3f sec",
-      i, _timer.seconds());
+      worker_id, _timer.seconds());
   }
 }
 
@@ -5207,8 +5210,8 @@
   SequentialSubTasksDone* pst = space->par_seq_tasks();
   assert(pst->valid(), "Uninitialized use?");
 
-  int nth_task = 0;
-  int n_tasks  = pst->n_tasks();
+  uint nth_task = 0;
+  uint n_tasks  = pst->n_tasks();
 
   HeapWord *start, *end;
   while (!pst->is_task_claimed(/* reference */ nth_task)) {
@@ -5220,12 +5223,12 @@
     } else if (nth_task == 0) {
       start = space->bottom();
       end   = chunk_array[nth_task];
-    } else if (nth_task < (jint)chunk_top) {
+    } else if (nth_task < (uint)chunk_top) {
       assert(nth_task >= 1, "Control point invariant");
       start = chunk_array[nth_task - 1];
       end   = chunk_array[nth_task];
     } else {
-      assert(nth_task == (jint)chunk_top, "Control point invariant");
+      assert(nth_task == (uint)chunk_top, "Control point invariant");
       start = chunk_array[chunk_top - 1];
       end   = space->top();
     }
@@ -5288,7 +5291,7 @@
 
   SequentialSubTasksDone* pst = sp->conc_par_seq_tasks();
   assert(pst->valid(), "Uninitialized use?");
-  int nth_task = 0;
+  uint nth_task = 0;
   const int alignment = CardTableModRefBS::card_size * BitsPerWord;
   MemRegion span = sp->used_region();
   HeapWord* start_addr = span.start();
@@ -5591,6 +5594,7 @@
     GenCollectedHeap::StrongRootsScope srs(gch);
     workers->run_task(&tsk);
   } else {
+    ReferenceProcessorMTDiscoveryMutator mt(ref_processor(), false);
     GenCollectedHeap::StrongRootsScope srs(gch);
     tsk.work(0);
   }
@@ -5605,6 +5609,8 @@
   ResourceMark rm;
   HandleMark   hm;
   GenCollectedHeap* gch = GenCollectedHeap::heap();
+  ReferenceProcessorMTDiscoveryMutator mt(ref_processor(), false);
+
   MarkRefsIntoAndScanClosure
     mrias_cl(_span, ref_processor(), &_markBitMap, &_modUnionTable,
              &_markStack, &_revisitStack, this,
@@ -5736,26 +5742,26 @@
                      CMSParKeepAliveClosure* keep_alive,
                      int* seed);
 
-  virtual void work(int i);
+  virtual void work(uint worker_id);
 };
 
-void CMSRefProcTaskProxy::work(int i) {
+void CMSRefProcTaskProxy::work(uint worker_id) {
   assert(_collector->_span.equals(_span), "Inconsistency in _span");
   CMSParKeepAliveClosure par_keep_alive(_collector, _span,
                                         _mark_bit_map,
                                         &_collector->_revisitStack,
-                                        work_queue(i));
+                                        work_queue(worker_id));
   CMSParDrainMarkingStackClosure par_drain_stack(_collector, _span,
                                                  _mark_bit_map,
                                                  &_collector->_revisitStack,
-                                                 work_queue(i));
+                                                 work_queue(worker_id));
   CMSIsAliveClosure is_alive_closure(_span, _mark_bit_map);
-  _task.work(i, is_alive_closure, par_keep_alive, par_drain_stack);
+  _task.work(worker_id, is_alive_closure, par_keep_alive, par_drain_stack);
   if (_task.marks_oops_alive()) {
-    do_work_steal(i, &par_drain_stack, &par_keep_alive,
-                  _collector->hash_seed(i));
-  }
-  assert(work_queue(i)->size() == 0, "work_queue should be empty");
+    do_work_steal(worker_id, &par_drain_stack, &par_keep_alive,
+                  _collector->hash_seed(worker_id));
+  }
+  assert(work_queue(worker_id)->size() == 0, "work_queue should be empty");
   assert(_collector->_overflow_list == NULL, "non-empty _overflow_list");
 }
 
@@ -5769,9 +5775,9 @@
       _task(task)
   { }
 
-  virtual void work(int i)
+  virtual void work(uint worker_id)
   {
-    _task.work(i);
+    _task.work(worker_id);
   }
 };
 
@@ -6086,7 +6092,11 @@
   _inter_sweep_timer.reset();
   _inter_sweep_timer.start();
 
-  update_time_of_last_gc(os::javaTimeMillis());
+  // We need to use a monotonically non-deccreasing time in ms
+  // or we will see time-warp warnings and os::javaTimeMillis()
+  // does not guarantee monotonicity.
+  jlong now = os::javaTimeNanos() / NANOSECS_PER_MILLISEC;
+  update_time_of_last_gc(now);
 
   // NOTE on abstract state transitions:
   // Mutators allocate-live and/or mark the mod-union table dirty
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepThread.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepThread.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -75,10 +75,25 @@
   set_name("Concurrent Mark-Sweep GC Thread");
 
   if (os::create_thread(this, os::cgc_thread)) {
-    // XXX: need to set this to low priority
-    // unless "agressive mode" set; priority
-    // should be just less than that of VMThread.
-    os::set_priority(this, NearMaxPriority);
+    // An old comment here said: "Priority should be just less
+    // than that of VMThread".  Since the VMThread runs at
+    // NearMaxPriority, the old comment was inaccurate, but
+    // changing the default priority to NearMaxPriority-1
+    // could change current behavior, so the default of
+    // NearMaxPriority stays in place.
+    //
+    // Note that there's a possibility of the VMThread
+    // starving if UseCriticalCMSThreadPriority is on.
+    // That won't happen on Solaris for various reasons,
+    // but may well happen on non-Solaris platforms.
+    int native_prio;
+    if (UseCriticalCMSThreadPriority) {
+      native_prio = os::java_to_os_priority[CriticalPriority];
+    } else {
+      native_prio = os::java_to_os_priority[NearMaxPriority];
+    }
+    os::set_native_priority(this, native_prio);
+
     if (!DisableStartThread) {
       os::start_thread(this);
     }
--- a/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -48,6 +48,8 @@
 
 #ifndef PRODUCT
 bool CSetChooserCache::verify() {
+  guarantee(false, "CSetChooserCache::verify(): don't call this any more");
+
   int index = _first;
   HeapRegion *prev = NULL;
   for (int i = 0; i < _occupancy; ++i) {
@@ -75,6 +77,8 @@
 #endif // PRODUCT
 
 void CSetChooserCache::insert(HeapRegion *hr) {
+  guarantee(false, "CSetChooserCache::insert(): don't call this any more");
+
   assert(!is_full(), "cache should not be empty");
   hr->calc_gc_efficiency();
 
@@ -104,6 +108,9 @@
 }
 
 HeapRegion *CSetChooserCache::remove_first() {
+  guarantee(false, "CSetChooserCache::remove_first(): "
+                   "don't call this any more");
+
   if (_occupancy > 0) {
     assert(_cache[_first] != NULL, "cache should have at least one region");
     HeapRegion *ret = _cache[_first];
@@ -118,16 +125,35 @@
   }
 }
 
-static inline int orderRegions(HeapRegion* hr1, HeapRegion* hr2) {
+// Even though we don't use the GC efficiency in our heuristics as
+// much as we used to, we still order according to GC efficiency. This
+// will cause regions with a lot of live objects and large RSets to
+// end up at the end of the array. Given that we might skip collecting
+// the last few old regions, if after a few mixed GCs the remaining
+// have reclaimable bytes under a certain threshold, the hope is that
+// the ones we'll skip are ones with both large RSets and a lot of
+// live objects, not the ones with just a lot of live objects if we
+// ordered according to the amount of reclaimable bytes per region.
+static int orderRegions(HeapRegion* hr1, HeapRegion* hr2) {
   if (hr1 == NULL) {
-    if (hr2 == NULL) return 0;
-    else return 1;
+    if (hr2 == NULL) {
+      return 0;
+    } else {
+      return 1;
+    }
   } else if (hr2 == NULL) {
     return -1;
   }
-  if (hr2->gc_efficiency() < hr1->gc_efficiency()) return -1;
-  else if (hr1->gc_efficiency() < hr2->gc_efficiency()) return 1;
-  else return 0;
+
+  double gc_eff1 = hr1->gc_efficiency();
+  double gc_eff2 = hr2->gc_efficiency();
+  if (gc_eff1 > gc_eff2) {
+    return -1;
+  } if (gc_eff1 < gc_eff2) {
+    return 1;
+  } else {
+    return 0;
+  }
 }
 
 static int orderRegions(HeapRegion** hr1p, HeapRegion** hr2p) {
@@ -151,51 +177,61 @@
   //
   _markedRegions((ResourceObj::set_allocation_type((address)&_markedRegions,
                                              ResourceObj::C_HEAP),
-                  100),
-                 true),
-  _curMarkedIndex(0),
-  _numMarkedRegions(0),
-  _unmarked_age_1_returned_as_new(false),
-  _first_par_unreserved_idx(0)
-{}
-
-
+                  100), true /* C_Heap */),
+    _curr_index(0), _length(0),
+    _regionLiveThresholdBytes(0), _remainingReclaimableBytes(0),
+    _first_par_unreserved_idx(0) {
+  _regionLiveThresholdBytes =
+    HeapRegion::GrainBytes * (size_t) G1OldCSetRegionLiveThresholdPercent / 100;
+}
 
 #ifndef PRODUCT
 bool CollectionSetChooser::verify() {
+  guarantee(_length >= 0, err_msg("_length: %d", _length));
+  guarantee(0 <= _curr_index && _curr_index <= _length,
+            err_msg("_curr_index: %d _length: %d", _curr_index, _length));
   int index = 0;
-  guarantee(_curMarkedIndex <= _numMarkedRegions,
-            "_curMarkedIndex should be within bounds");
-  while (index < _curMarkedIndex) {
-    guarantee(_markedRegions.at(index++) == NULL,
-              "all entries before _curMarkedIndex should be NULL");
+  size_t sum_of_reclaimable_bytes = 0;
+  while (index < _curr_index) {
+    guarantee(_markedRegions.at(index) == NULL,
+              "all entries before _curr_index should be NULL");
+    index += 1;
   }
   HeapRegion *prev = NULL;
-  while (index < _numMarkedRegions) {
+  while (index < _length) {
     HeapRegion *curr = _markedRegions.at(index++);
     guarantee(curr != NULL, "Regions in _markedRegions array cannot be NULL");
     int si = curr->sort_index();
     guarantee(!curr->is_young(), "should not be young!");
+    guarantee(!curr->isHumongous(), "should not be humongous!");
     guarantee(si > -1 && si == (index-1), "sort index invariant");
     if (prev != NULL) {
-      guarantee(orderRegions(prev, curr) != 1, "regions should be sorted");
+      guarantee(orderRegions(prev, curr) != 1,
+                err_msg("GC eff prev: %1.4f GC eff curr: %1.4f",
+                        prev->gc_efficiency(), curr->gc_efficiency()));
     }
+    sum_of_reclaimable_bytes += curr->reclaimable_bytes();
     prev = curr;
   }
-  return _cache.verify();
+  guarantee(sum_of_reclaimable_bytes == _remainingReclaimableBytes,
+            err_msg("reclaimable bytes inconsistent, "
+                    "remaining: "SIZE_FORMAT" sum: "SIZE_FORMAT,
+                    _remainingReclaimableBytes, sum_of_reclaimable_bytes));
+  return true;
 }
 #endif
 
-void
-CollectionSetChooser::fillCache() {
-  while (!_cache.is_full() && (_curMarkedIndex < _numMarkedRegions)) {
-    HeapRegion* hr = _markedRegions.at(_curMarkedIndex);
+void CollectionSetChooser::fillCache() {
+  guarantee(false, "fillCache: don't call this any more");
+
+  while (!_cache.is_full() && (_curr_index < _length)) {
+    HeapRegion* hr = _markedRegions.at(_curr_index);
     assert(hr != NULL,
            err_msg("Unexpected NULL hr in _markedRegions at index %d",
-                   _curMarkedIndex));
-    _curMarkedIndex += 1;
+                   _curr_index));
+    _curr_index += 1;
     assert(!hr->is_young(), "should not be young!");
-    assert(hr->sort_index() == _curMarkedIndex-1, "sort_index invariant");
+    assert(hr->sort_index() == _curr_index-1, "sort_index invariant");
     _markedRegions.at_put(hr->sort_index(), NULL);
     _cache.insert(hr);
     assert(!_cache.is_empty(), "cache should not be empty");
@@ -203,9 +239,7 @@
   assert(verify(), "cache should be consistent");
 }
 
-void
-CollectionSetChooser::sortMarkedHeapRegions() {
-  guarantee(_cache.is_empty(), "cache should be empty");
+void CollectionSetChooser::sortMarkedHeapRegions() {
   // First trim any unused portion of the top in the parallel case.
   if (_first_par_unreserved_idx > 0) {
     if (G1PrintParCleanupStats) {
@@ -217,43 +251,78 @@
     _markedRegions.trunc_to(_first_par_unreserved_idx);
   }
   _markedRegions.sort(orderRegions);
-  assert(_numMarkedRegions <= _markedRegions.length(), "Requirement");
-  assert(_numMarkedRegions == 0
-         || _markedRegions.at(_numMarkedRegions-1) != NULL,
-         "Testing _numMarkedRegions");
-  assert(_numMarkedRegions == _markedRegions.length()
-         || _markedRegions.at(_numMarkedRegions) == NULL,
-         "Testing _numMarkedRegions");
+  assert(_length <= _markedRegions.length(), "Requirement");
+  assert(_length == 0 || _markedRegions.at(_length - 1) != NULL,
+         "Testing _length");
+  assert(_length == _markedRegions.length() ||
+                        _markedRegions.at(_length) == NULL, "Testing _length");
   if (G1PrintParCleanupStats) {
-    gclog_or_tty->print_cr("     Sorted %d marked regions.", _numMarkedRegions);
+    gclog_or_tty->print_cr("     Sorted %d marked regions.", _length);
   }
-  for (int i = 0; i < _numMarkedRegions; i++) {
+  for (int i = 0; i < _length; i++) {
     assert(_markedRegions.at(i) != NULL, "Should be true by sorting!");
     _markedRegions.at(i)->set_sort_index(i);
   }
   if (G1PrintRegionLivenessInfo) {
     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Sorting");
-    for (int i = 0; i < _numMarkedRegions; ++i) {
+    for (int i = 0; i < _length; ++i) {
       HeapRegion* r = _markedRegions.at(i);
       cl.doHeapRegion(r);
     }
   }
-  assert(verify(), "should now be sorted");
+  assert(verify(), "CSet chooser verification");
 }
 
-void
-CollectionSetChooser::addMarkedHeapRegion(HeapRegion* hr) {
+size_t CollectionSetChooser::calcMinOldCSetLength() {
+  // The min old CSet region bound is based on the maximum desired
+  // number of mixed GCs after a cycle. I.e., even if some old regions
+  // look expensive, we should add them to the CSet anyway to make
+  // sure we go through the available old regions in no more than the
+  // maximum desired number of mixed GCs.
+  //
+  // The calculation is based on the number of marked regions we added
+  // to the CSet chooser in the first place, not how many remain, so
+  // that the result is the same during all mixed GCs that follow a cycle.
+
+  const size_t region_num = (size_t) _length;
+  const size_t gc_num = (size_t) G1MaxMixedGCNum;
+  size_t result = region_num / gc_num;
+  // emulate ceiling
+  if (result * gc_num < region_num) {
+    result += 1;
+  }
+  return result;
+}
+
+size_t CollectionSetChooser::calcMaxOldCSetLength() {
+  // The max old CSet region bound is based on the threshold expressed
+  // as a percentage of the heap size. I.e., it should bound the
+  // number of old regions added to the CSet irrespective of how many
+  // of them are available.
+
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  const size_t region_num = g1h->n_regions();
+  const size_t perc = (size_t) G1OldCSetRegionThresholdPercent;
+  size_t result = region_num * perc / 100;
+  // emulate ceiling
+  if (100 * result < region_num * perc) {
+    result += 1;
+  }
+  return result;
+}
+
+void CollectionSetChooser::addMarkedHeapRegion(HeapRegion* hr) {
   assert(!hr->isHumongous(),
          "Humongous regions shouldn't be added to the collection set");
   assert(!hr->is_young(), "should not be young!");
   _markedRegions.append(hr);
-  _numMarkedRegions++;
+  _length++;
+  _remainingReclaimableBytes += hr->reclaimable_bytes();
   hr->calc_gc_efficiency();
 }
 
-void
-CollectionSetChooser::
-prepareForAddMarkedHeapRegionsPar(size_t n_regions, size_t chunkSize) {
+void CollectionSetChooser::prepareForAddMarkedHeapRegionsPar(size_t n_regions,
+                                                             size_t chunkSize) {
   _first_par_unreserved_idx = 0;
   int n_threads = ParallelGCThreads;
   if (UseDynamicNumberOfGCThreads) {
@@ -264,7 +333,7 @@
     // or some improperly initialized variable with leads to no
     // active threads, protect against that in a product build.
     n_threads = MAX2(G1CollectedHeap::heap()->workers()->active_workers(),
-                     1);
+                     1U);
   }
   size_t max_waste = n_threads * chunkSize;
   // it should be aligned with respect to chunkSize
@@ -274,8 +343,7 @@
   _markedRegions.at_put_grow((int)(aligned_n_regions + max_waste - 1), NULL);
 }
 
-jint
-CollectionSetChooser::getParMarkedHeapRegionChunk(jint n_regions) {
+jint CollectionSetChooser::getParMarkedHeapRegionChunk(jint n_regions) {
   // Don't do this assert because this can be called at a point
   // where the loop up stream will not execute again but might
   // try to claim more chunks (loop test has not been done yet).
@@ -287,83 +355,37 @@
   return res - n_regions;
 }
 
-void
-CollectionSetChooser::setMarkedHeapRegion(jint index, HeapRegion* hr) {
+void CollectionSetChooser::setMarkedHeapRegion(jint index, HeapRegion* hr) {
   assert(_markedRegions.at(index) == NULL, "precondition");
   assert(!hr->is_young(), "should not be young!");
   _markedRegions.at_put(index, hr);
   hr->calc_gc_efficiency();
 }
 
-void
-CollectionSetChooser::incNumMarkedHeapRegions(jint inc_by) {
-  (void)Atomic::add(inc_by, &_numMarkedRegions);
-}
-
-void
-CollectionSetChooser::clearMarkedHeapRegions(){
-  for (int i = 0; i < _markedRegions.length(); i++) {
-    HeapRegion* r =   _markedRegions.at(i);
-    if (r != NULL) r->set_sort_index(-1);
+void CollectionSetChooser::updateTotals(jint region_num,
+                                        size_t reclaimable_bytes) {
+  // Only take the lock if we actually need to update the totals.
+  if (region_num > 0) {
+    assert(reclaimable_bytes > 0, "invariant");
+    // We could have just used atomics instead of taking the
+    // lock. However, we currently don't have an atomic add for size_t.
+    MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
+    _length += (int) region_num;
+    _remainingReclaimableBytes += reclaimable_bytes;
+  } else {
+    assert(reclaimable_bytes == 0, "invariant");
   }
-  _markedRegions.clear();
-  _curMarkedIndex = 0;
-  _numMarkedRegions = 0;
-  _cache.clear();
-};
-
-void
-CollectionSetChooser::updateAfterFullCollection() {
-  clearMarkedHeapRegions();
 }
 
-// if time_remaining < 0.0, then this method should try to return
-// a region, whether it fits within the remaining time or not
-HeapRegion*
-CollectionSetChooser::getNextMarkedRegion(double time_remaining,
-                                          double avg_prediction) {
-  G1CollectedHeap* g1h = G1CollectedHeap::heap();
-  G1CollectorPolicy* g1p = g1h->g1_policy();
-  fillCache();
-  if (_cache.is_empty()) {
-    assert(_curMarkedIndex == _numMarkedRegions,
-           "if cache is empty, list should also be empty");
-    ergo_verbose0(ErgoCSetConstruction,
-                  "stop adding old regions to CSet",
-                  ergo_format_reason("cache is empty"));
-    return NULL;
-  }
-
-  HeapRegion *hr = _cache.get_first();
-  assert(hr != NULL, "if cache not empty, first entry should be non-null");
-  double predicted_time = g1h->predict_region_elapsed_time_ms(hr, false);
-
-  if (g1p->adaptive_young_list_length()) {
-    if (time_remaining - predicted_time < 0.0) {
-      g1h->check_if_region_is_too_expensive(predicted_time);
-      ergo_verbose2(ErgoCSetConstruction,
-                    "stop adding old regions to CSet",
-                    ergo_format_reason("predicted old region time higher than remaining time")
-                    ergo_format_ms("predicted old region time")
-                    ergo_format_ms("remaining time"),
-                    predicted_time, time_remaining);
-      return NULL;
-    }
-  } else {
-    double threshold = 2.0 * avg_prediction;
-    if (predicted_time > threshold) {
-      ergo_verbose2(ErgoCSetConstruction,
-                    "stop adding old regions to CSet",
-                    ergo_format_reason("predicted old region time higher than threshold")
-                    ergo_format_ms("predicted old region time")
-                    ergo_format_ms("threshold"),
-                    predicted_time, threshold);
-      return NULL;
+void CollectionSetChooser::clearMarkedHeapRegions() {
+  for (int i = 0; i < _markedRegions.length(); i++) {
+    HeapRegion* r = _markedRegions.at(i);
+    if (r != NULL) {
+      r->set_sort_index(-1);
     }
   }
-
-  HeapRegion *hr2 = _cache.remove_first();
-  assert(hr == hr2, "cache contents should not have changed");
-
-  return hr;
-}
+  _markedRegions.clear();
+  _curr_index = 0;
+  _length = 0;
+  _remainingReclaimableBytes = 0;
+};
--- a/src/share/vm/gc_implementation/g1/collectionSetChooser.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/g1/collectionSetChooser.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -28,28 +28,6 @@
 #include "gc_implementation/g1/heapRegion.hpp"
 #include "utilities/growableArray.hpp"
 
-// We need to sort heap regions by collection desirability.
-// This sorting is currently done in two "stages". An initial sort is
-// done following a cleanup pause as soon as all of the marked but
-// non-empty regions have been identified and the completely empty
-// ones reclaimed.
-// This gives us a global sort on a GC efficiency metric
-// based on predictive data available at that time. However,
-// any of these regions that are collected will only be collected
-// during a future GC pause, by which time it is possible that newer
-// data might allow us to revise and/or refine the earlier
-// pause predictions, leading to changes in expected gc efficiency
-// order. To somewhat mitigate this obsolescence, more so in the
-// case of regions towards the end of the list, which will be
-// picked later, these pre-sorted regions from the _markedRegions
-// array are not used as is, but a small prefix thereof is
-// insertion-sorted again into a small cache, based on more
-// recent remembered set information. Regions are then drawn
-// from this cache to construct the collection set at each
-// incremental GC.
-// This scheme and/or its implementation may be subject to
-// revision in the future.
-
 class CSetChooserCache VALUE_OBJ_CLASS_SPEC {
 private:
   enum {
@@ -103,24 +81,82 @@
 class CollectionSetChooser: public CHeapObj {
 
   GrowableArray<HeapRegion*> _markedRegions;
-  int _curMarkedIndex;
-  int _numMarkedRegions;
-  CSetChooserCache _cache;
+
+  // The index of the next candidate old region to be considered for
+  // addition to the CSet.
+  int _curr_index;
+
+  // The number of candidate old regions added to the CSet chooser.
+  int _length;
 
-  // True iff last collection pause ran of out new "age 0" regions, and
-  // returned an "age 1" region.
-  bool _unmarked_age_1_returned_as_new;
+  CSetChooserCache _cache;
+  jint _first_par_unreserved_idx;
 
-  jint _first_par_unreserved_idx;
+  // If a region has more live bytes than this threshold, it will not
+  // be added to the CSet chooser and will not be a candidate for
+  // collection.
+  size_t _regionLiveThresholdBytes;
+
+  // The sum of reclaimable bytes over all the regions in the CSet chooser.
+  size_t _remainingReclaimableBytes;
 
 public:
 
-  HeapRegion* getNextMarkedRegion(double time_so_far, double avg_prediction);
+  // Return the current candidate region to be considered for
+  // collection without removing it from the CSet chooser.
+  HeapRegion* peek() {
+    HeapRegion* res = NULL;
+    if (_curr_index < _length) {
+      res = _markedRegions.at(_curr_index);
+      assert(res != NULL,
+             err_msg("Unexpected NULL hr in _markedRegions at index %d",
+                     _curr_index));
+    }
+    return res;
+  }
+
+  // Remove the given region from the CSet chooser and move to the
+  // next one. The given region should be the current candidate region
+  // in the CSet chooser.
+  void remove_and_move_to_next(HeapRegion* hr) {
+    assert(hr != NULL, "pre-condition");
+    assert(_curr_index < _length, "pre-condition");
+    assert(_markedRegions.at(_curr_index) == hr, "pre-condition");
+    hr->set_sort_index(-1);
+    _markedRegions.at_put(_curr_index, NULL);
+    assert(hr->reclaimable_bytes() <= _remainingReclaimableBytes,
+           err_msg("remaining reclaimable bytes inconsistent "
+                   "from region: "SIZE_FORMAT" remaining: "SIZE_FORMAT,
+                   hr->reclaimable_bytes(), _remainingReclaimableBytes));
+    _remainingReclaimableBytes -= hr->reclaimable_bytes();
+    _curr_index += 1;
+  }
 
   CollectionSetChooser();
 
   void sortMarkedHeapRegions();
   void fillCache();
+
+  // Determine whether to add the given region to the CSet chooser or
+  // not. Currently, we skip humongous regions (we never add them to
+  // the CSet, we only reclaim them during cleanup) and regions whose
+  // live bytes are over the threshold.
+  bool shouldAdd(HeapRegion* hr) {
+    assert(hr->is_marked(), "pre-condition");
+    assert(!hr->is_young(), "should never consider young regions");
+    return !hr->isHumongous() &&
+            hr->live_bytes() < _regionLiveThresholdBytes;
+  }
+
+  // Calculate the minimum number of old regions we'll add to the CSet
+  // during a mixed GC.
+  size_t calcMinOldCSetLength();
+
+  // Calculate the maximum number of old regions we'll add to the CSet
+  // during a mixed GC.
+  size_t calcMaxOldCSetLength();
+
+  // Serial version.
   void addMarkedHeapRegion(HeapRegion *hr);
 
   // Must be called before calls to getParMarkedHeapRegionChunk.
@@ -133,14 +169,21 @@
   // Set the marked array entry at index to hr.  Careful to claim the index
   // first if in parallel.
   void setMarkedHeapRegion(jint index, HeapRegion* hr);
-  // Atomically increment the number of claimed regions by "inc_by".
-  void incNumMarkedHeapRegions(jint inc_by);
+  // Atomically increment the number of added regions by region_num
+  // and the amount of reclaimable bytes by reclaimable_bytes.
+  void updateTotals(jint region_num, size_t reclaimable_bytes);
 
   void clearMarkedHeapRegions();
 
-  void updateAfterFullCollection();
+  // Return the number of candidate regions that remain to be collected.
+  size_t remainingRegions() { return _length - _curr_index; }
 
-  bool unmarked_age_1_returned_as_new() { return _unmarked_age_1_returned_as_new; }
+  // Determine whether the CSet chooser has more candidate regions or not.
+  bool isEmpty() { return remainingRegions() == 0; }
+
+  // Return the reclaimable bytes that remain to be collected on
+  // all the candidate regions in the CSet chooser.
+  size_t remainingReclaimableBytes () { return _remainingReclaimableBytes; }
 
   // Returns true if the used portion of "_markedRegions" is properly
   // sorted, otherwise asserts false.
@@ -148,9 +191,17 @@
   bool verify(void);
   bool regionProperlyOrdered(HeapRegion* r) {
     int si = r->sort_index();
-    return (si == -1) ||
-      (si > -1 && _markedRegions.at(si) == r) ||
-      (si < -1 && _cache.region_in_cache(r));
+    if (si > -1) {
+      guarantee(_curr_index <= si && si < _length,
+                err_msg("curr: %d sort index: %d: length: %d",
+                        _curr_index, si, _length));
+      guarantee(_markedRegions.at(si) == r,
+                err_msg("sort index: %d at: "PTR_FORMAT" r: "PTR_FORMAT,
+                        si, _markedRegions.at(si), r));
+    } else {
+      guarantee(si == -1, err_msg("sort index: %d", si));
+    }
+    return true;
   }
 #endif
 
--- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -31,6 +31,7 @@
 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
 #include "gc_implementation/g1/g1RemSet.hpp"
+#include "gc_implementation/g1/heapRegion.inline.hpp"
 #include "gc_implementation/g1/heapRegionRemSet.hpp"
 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
 #include "gc_implementation/shared/vmGCOperations.hpp"
@@ -41,8 +42,7 @@
 #include "runtime/handles.inline.hpp"
 #include "runtime/java.hpp"
 
-//
-// CMS Bit Map Wrapper
+// Concurrent marking bit map wrapper
 
 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) :
   _bm((uintptr_t*)NULL,0),
@@ -52,13 +52,13 @@
   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
 
-  guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
+  guarantee(brs.is_reserved(), "couldn't allocate concurrent marking bit map");
   // For now we'll just commit all of the bit map up fromt.
   // Later on we'll try to be more parsimonious with swap.
   guarantee(_virtual_space.initialize(brs, brs.size()),
-            "couldn't reseve backing store for CMS bit map");
+            "couldn't reseve backing store for concurrent marking bit map");
   assert(_virtual_space.committed_size() == brs.size(),
-         "didn't reserve backing store for all of CMS bit map?");
+         "didn't reserve backing store for all of concurrent marking bit map?");
   _bm.set_map((uintptr_t*)_virtual_space.low());
   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
          _bmWordSize, "inconsistency in bit map sizing");
@@ -103,17 +103,6 @@
   return (int) (diff >> _shifter);
 }
 
-bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
-  HeapWord* left  = MAX2(_bmStartWord, mr.start());
-  HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
-  if (right > left) {
-    // Right-open interval [leftOffset, rightOffset).
-    return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
-  } else {
-    return true;
-  }
-}
-
 void CMBitMapRO::mostly_disjoint_range_union(BitMap*   from_bitmap,
                                              size_t    from_start_index,
                                              HeapWord* to_start_word,
@@ -183,12 +172,11 @@
 void CMMarkStack::allocate(size_t size) {
   _base = NEW_C_HEAP_ARRAY(oop, size);
   if (_base == NULL) {
-    vm_exit_during_initialization("Failed to allocate "
-                                  "CM region mark stack");
+    vm_exit_during_initialization("Failed to allocate CM region mark stack");
   }
   _index = 0;
   _capacity = (jint) size;
-  _oops_do_bound = -1;
+  _saved_index = -1;
   NOT_PRODUCT(_max_depth = 0);
 }
 
@@ -283,7 +271,6 @@
   }
 }
 
-
 CMRegionStack::CMRegionStack() : _base(NULL) {}
 
 void CMRegionStack::allocate(size_t size) {
@@ -302,6 +289,8 @@
 }
 
 void CMRegionStack::push_lock_free(MemRegion mr) {
+  guarantee(false, "push_lock_free(): don't call this any more");
+
   assert(mr.word_size() > 0, "Precondition");
   while (true) {
     jint index = _index;
@@ -325,6 +314,8 @@
 // marking / remark phases. Should only be called in tandem with
 // other lock-free pops.
 MemRegion CMRegionStack::pop_lock_free() {
+  guarantee(false, "pop_lock_free(): don't call this any more");
+
   while (true) {
     jint index = _index;
 
@@ -390,6 +381,8 @@
 #endif
 
 bool CMRegionStack::invalidate_entries_into_cset() {
+  guarantee(false, "invalidate_entries_into_cset(): don't call this any more");
+
   bool result = false;
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   for (int i = 0; i < _oops_do_bound; ++i) {
@@ -426,8 +419,6 @@
     assert(newOop->is_oop(), "Expected an oop");
     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
            "only grey objects on this stack");
-    // iterate over the oops in this oop, marking and pushing
-    // the ones in CMS generation.
     newOop->oop_iterate(cl);
     if (yield_after && _cm->do_yield_check()) {
       res = false;
@@ -438,14 +429,29 @@
   return res;
 }
 
+void CMMarkStack::note_start_of_gc() {
+  assert(_saved_index == -1,
+         "note_start_of_gc()/end_of_gc() bracketed incorrectly");
+  _saved_index = _index;
+}
+
+void CMMarkStack::note_end_of_gc() {
+  // This is intentionally a guarantee, instead of an assert. If we
+  // accidentally add something to the mark stack during GC, it
+  // will be a correctness issue so it's better if we crash. we'll
+  // only check this once per GC anyway, so it won't be a performance
+  // issue in any way.
+  guarantee(_saved_index == _index,
+            err_msg("saved index: %d index: %d", _saved_index, _index));
+  _saved_index = -1;
+}
+
 void CMMarkStack::oops_do(OopClosure* f) {
-  if (_index == 0) return;
-  assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
-         "Bound must be set.");
-  for (int i = 0; i < _oops_do_bound; i++) {
+  assert(_saved_index == _index,
+         err_msg("saved index: %d index: %d", _saved_index, _index));
+  for (int i = 0; i < _index; i += 1) {
     f->do_oop(&_base[i]);
   }
-  _oops_do_bound = -1;
 }
 
 bool ConcurrentMark::not_yet_marked(oop obj) const {
@@ -454,12 +460,90 @@
               && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
 }
 
+CMRootRegions::CMRootRegions() :
+  _young_list(NULL), _cm(NULL), _scan_in_progress(false),
+  _should_abort(false),  _next_survivor(NULL) { }
+
+void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
+  _young_list = g1h->young_list();
+  _cm = cm;
+}
+
+void CMRootRegions::prepare_for_scan() {
+  assert(!scan_in_progress(), "pre-condition");
+
+  // Currently, only survivors can be root regions.
+  assert(_next_survivor == NULL, "pre-condition");
+  _next_survivor = _young_list->first_survivor_region();
+  _scan_in_progress = (_next_survivor != NULL);
+  _should_abort = false;
+}
+
+HeapRegion* CMRootRegions::claim_next() {
+  if (_should_abort) {
+    // If someone has set the should_abort flag, we return NULL to
+    // force the caller to bail out of their loop.
+    return NULL;
+  }
+
+  // Currently, only survivors can be root regions.
+  HeapRegion* res = _next_survivor;
+  if (res != NULL) {
+    MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
+    // Read it again in case it changed while we were waiting for the lock.
+    res = _next_survivor;
+    if (res != NULL) {
+      if (res == _young_list->last_survivor_region()) {
+        // We just claimed the last survivor so store NULL to indicate
+        // that we're done.
+        _next_survivor = NULL;
+      } else {
+        _next_survivor = res->get_next_young_region();
+      }
+    } else {
+      // Someone else claimed the last survivor while we were trying
+      // to take the lock so nothing else to do.
+    }
+  }
+  assert(res == NULL || res->is_survivor(), "post-condition");
+
+  return res;
+}
+
+void CMRootRegions::scan_finished() {
+  assert(scan_in_progress(), "pre-condition");
+
+  // Currently, only survivors can be root regions.
+  if (!_should_abort) {
+    assert(_next_survivor == NULL, "we should have claimed all survivors");
+  }
+  _next_survivor = NULL;
+
+  {
+    MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
+    _scan_in_progress = false;
+    RootRegionScan_lock->notify_all();
+  }
+}
+
+bool CMRootRegions::wait_until_scan_finished() {
+  if (!scan_in_progress()) return false;
+
+  {
+    MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
+    while (scan_in_progress()) {
+      RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
+    }
+  }
+  return true;
+}
+
 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 #endif // _MSC_VER
 
-size_t ConcurrentMark::scale_parallel_threads(size_t n_par_threads) {
-  return MAX2((n_par_threads + 2) / 4, (size_t)1);
+uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
+  return MAX2((n_par_threads + 2) / 4, 1U);
 }
 
 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
@@ -478,6 +562,7 @@
   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
            CardTableModRefBS::card_shift,
            false /* in_resource_area*/),
+
   _prevMarkBitMap(&_markBitMap1),
   _nextMarkBitMap(&_markBitMap2),
   _at_least_one_mark_complete(false),
@@ -486,7 +571,7 @@
   _regionStack(),
   // _finger set in set_non_marking_state
 
-  _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
+  _max_task_num(MAX2((uint)ParallelGCThreads, 1U)),
   // _active_tasks set in set_non_marking_state
   // _tasks set inside the constructor
   _task_queues(new CMTaskQueueSet((int) _max_task_num)),
@@ -507,7 +592,10 @@
   _total_counting_time(0.0),
   _total_rs_scrub_time(0.0),
 
-  _parallel_workers(NULL) {
+  _parallel_workers(NULL),
+
+  _count_card_bitmaps(NULL),
+  _count_marked_bytes(NULL) {
   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
   if (verbose_level < no_verbose) {
     verbose_level = no_verbose;
@@ -538,9 +626,16 @@
   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
   satb_qs.set_buffer_size(G1SATBBufferSize);
 
+  _root_regions.init(_g1h, this);
+
   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
 
+  _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_task_num);
+  _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_task_num);
+
+  BitMap::idx_t card_bm_size = _card_bm.size();
+
   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
   _active_tasks = _max_task_num;
   for (int i = 0; i < (int) _max_task_num; ++i) {
@@ -548,10 +643,26 @@
     task_queue->initialize();
     _task_queues->register_queue(i, task_queue);
 
-    _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
+    _count_card_bitmaps[i] = BitMap(card_bm_size, false);
+    _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions);
+
+    _tasks[i] = new CMTask(i, this,
+                           _count_marked_bytes[i],
+                           &_count_card_bitmaps[i],
+                           task_queue, _task_queues);
+
     _accum_task_vtime[i] = 0.0;
   }
 
+  // Calculate the card number for the bottom of the heap. Used
+  // in biasing indexes into the accounting card bitmaps.
+  _heap_bottom_card_num =
+    intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
+                                CardTableModRefBS::card_shift);
+
+  // Clear all the liveness counting data
+  clear_all_count_data();
+
   if (ConcGCThreads > ParallelGCThreads) {
     vm_exit_during_initialization("Can't have more ConcGCThreads "
                                   "than ParallelGCThreads.");
@@ -568,7 +679,7 @@
       // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
       // if both are set
 
-      _parallel_marking_threads = ConcGCThreads;
+      _parallel_marking_threads = (uint) ConcGCThreads;
       _max_parallel_marking_threads = _parallel_marking_threads;
       _sleep_factor             = 0.0;
       _marking_task_overhead    = 1.0;
@@ -589,12 +700,12 @@
       double sleep_factor =
                          (1.0 - marking_task_overhead) / marking_task_overhead;
 
-      _parallel_marking_threads = (size_t) marking_thread_num;
+      _parallel_marking_threads = (uint) marking_thread_num;
       _max_parallel_marking_threads = _parallel_marking_threads;
       _sleep_factor             = sleep_factor;
       _marking_task_overhead    = marking_task_overhead;
     } else {
-      _parallel_marking_threads = scale_parallel_threads(ParallelGCThreads);
+      _parallel_marking_threads = scale_parallel_threads((uint)ParallelGCThreads);
       _max_parallel_marking_threads = _parallel_marking_threads;
       _sleep_factor             = 0.0;
       _marking_task_overhead    = 1.0;
@@ -618,7 +729,7 @@
 
     guarantee(parallel_marking_threads() > 0, "peace of mind");
     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
-         (int) _max_parallel_marking_threads, false, true);
+         _max_parallel_marking_threads, false, true);
     if (_parallel_workers == NULL) {
       vm_exit_during_initialization("Failed necessary allocation.");
     } else {
@@ -691,7 +802,7 @@
   set_concurrent_marking_in_progress();
 }
 
-void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
+void ConcurrentMark::set_phase(uint active_tasks, bool concurrent) {
   assert(active_tasks <= _max_task_num, "we should not have more");
 
   _active_tasks = active_tasks;
@@ -727,19 +838,10 @@
 }
 
 ConcurrentMark::~ConcurrentMark() {
-  for (int i = 0; i < (int) _max_task_num; ++i) {
-    delete _task_queues->queue(i);
-    delete _tasks[i];
-  }
-  delete _task_queues;
-  FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
+  // The ConcurrentMark instance is never freed.
+  ShouldNotReachHere();
 }
 
-// This closure is used to mark refs into the g1 generation
-// from external roots in the CMS bit map.
-// Called at the first checkpoint.
-//
-
 void ConcurrentMark::clearNextBitmap() {
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   G1CollectorPolicy* g1p = g1h->g1_policy();
@@ -779,6 +881,9 @@
     assert(!g1h->mark_in_progress(), "invariant");
   }
 
+  // Clear the liveness counting data
+  clear_all_count_data();
+
   // Repeat the asserts from above.
   guarantee(cmThread()->during_cycle(), "invariant");
   guarantee(!g1h->mark_in_progress(), "invariant");
@@ -788,7 +893,7 @@
 public:
   bool doHeapRegion(HeapRegion* r) {
     if (!r->continuesHumongous()) {
-      r->note_start_of_marking(true);
+      r->note_start_of_marking();
     }
     return false;
   }
@@ -809,6 +914,10 @@
 
   // Initialise marking structures. This has to be done in a STW phase.
   reset();
+
+  // For each region note start of marking.
+  NoteStartOfMarkHRClosure startcl;
+  g1h->heap_region_iterate(&startcl);
 }
 
 
@@ -823,10 +932,6 @@
   // every remark and we'll eventually not need to cause one.
   force_overflow_stw()->init();
 
-  // For each region note start of marking.
-  NoteStartOfMarkHRClosure startcl;
-  g1h->heap_region_iterate(&startcl);
-
   // Start Concurrent Marking weak-reference discovery.
   ReferenceProcessor* rp = g1h->ref_processor_cm();
   // enable ("weak") refs discovery
@@ -839,6 +944,8 @@
   satb_mq_set.set_active_all_threads(true, /* new active value */
                                      false /* expected_active */);
 
+  _root_regions.prepare_for_scan();
+
   // update_g1_committed() will be called at the end of an evac pause
   // when marking is on. So, it's also called at the end of the
   // initial-mark pause to update the heap end, if the heap expands
@@ -951,22 +1058,9 @@
 }
 #endif // !PRODUCT
 
-void ConcurrentMark::grayRoot(oop p) {
-  HeapWord* addr = (HeapWord*) p;
-  // We can't really check against _heap_start and _heap_end, since it
-  // is possible during an evacuation pause with piggy-backed
-  // initial-mark that the committed space is expanded during the
-  // pause without CM observing this change. So the assertions below
-  // is a bit conservative; but better than nothing.
-  assert(_g1h->g1_committed().contains(addr),
-         "address should be within the heap bounds");
-
-  if (!_nextMarkBitMap->isMarked(addr)) {
-    _nextMarkBitMap->parMark(addr);
-  }
-}
-
 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
+  guarantee(false, "grayRegionIfNecessary(): don't call this any more");
+
   // The objects on the region have already been marked "in bulk" by
   // the caller. We only need to decide whether to push the region on
   // the region stack or not.
@@ -1012,6 +1106,8 @@
 }
 
 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
+  guarantee(false, "markAndGrayObjectIfNecessary(): don't call this any more");
+
   // The object is not marked by the caller. We need to at least mark
   // it and maybe push in on the stack.
 
@@ -1048,7 +1144,7 @@
   ConcurrentMarkThread* _cmt;
 
 public:
-  void work(int worker_i) {
+  void work(uint worker_id) {
     assert(Thread::current()->is_ConcurrentGC_thread(),
            "this should only be done by a conc GC thread");
     ResourceMark rm;
@@ -1057,8 +1153,8 @@
 
     ConcurrentGCThread::stsJoin();
 
-    assert((size_t) worker_i < _cm->active_tasks(), "invariant");
-    CMTask* the_task = _cm->task(worker_i);
+    assert(worker_id < _cm->active_tasks(), "invariant");
+    CMTask* the_task = _cm->task(worker_id);
     the_task->record_start_time();
     if (!_cm->has_aborted()) {
       do {
@@ -1076,7 +1172,7 @@
         double elapsed_time_sec = end_time_sec - start_time_sec;
         _cm->clear_has_overflown();
 
-        bool ret = _cm->do_yield_check(worker_i);
+        bool ret = _cm->do_yield_check(worker_id);
 
         jlong sleep_time_ms;
         if (!_cm->has_aborted() && the_task->has_aborted()) {
@@ -1105,7 +1201,7 @@
     ConcurrentGCThread::stsLeave();
 
     double end_vtime = os::elapsedVTime();
-    _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
+    _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
   }
 
   CMConcurrentMarkingTask(ConcurrentMark* cm,
@@ -1117,12 +1213,9 @@
 
 // Calculates the number of active workers for a concurrent
 // phase.
-int ConcurrentMark::calc_parallel_marking_threads() {
-
-  size_t n_conc_workers;
-  if (!G1CollectedHeap::use_parallel_gc_threads()) {
-    n_conc_workers = 1;
-  } else {
+uint ConcurrentMark::calc_parallel_marking_threads() {
+  if (G1CollectedHeap::use_parallel_gc_threads()) {
+    uint n_conc_workers = 0;
     if (!UseDynamicNumberOfGCThreads ||
         (!FLAG_IS_DEFAULT(ConcGCThreads) &&
          !ForceDynamicNumberOfGCThreads)) {
@@ -1137,9 +1230,76 @@
       // Don't scale down "n_conc_workers" by scale_parallel_threads() because
       // that scaling has already gone into "_max_parallel_marking_threads".
     }
-  }
-  assert(n_conc_workers > 0, "Always need at least 1");
-  return (int) MAX2(n_conc_workers, (size_t) 1);
+    assert(n_conc_workers > 0, "Always need at least 1");
+    return n_conc_workers;
+  }
+  // If we are not running with any parallel GC threads we will not
+  // have spawned any marking threads either. Hence the number of
+  // concurrent workers should be 0.
+  return 0;
+}
+
+void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
+  // Currently, only survivors can be root regions.
+  assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
+  G1RootRegionScanClosure cl(_g1h, this, worker_id);
+
+  const uintx interval = PrefetchScanIntervalInBytes;
+  HeapWord* curr = hr->bottom();
+  const HeapWord* end = hr->top();
+  while (curr < end) {
+    Prefetch::read(curr, interval);
+    oop obj = oop(curr);
+    int size = obj->oop_iterate(&cl);
+    assert(size == obj->size(), "sanity");
+    curr += size;
+  }
+}
+
+class CMRootRegionScanTask : public AbstractGangTask {
+private:
+  ConcurrentMark* _cm;
+
+public:
+  CMRootRegionScanTask(ConcurrentMark* cm) :
+    AbstractGangTask("Root Region Scan"), _cm(cm) { }
+
+  void work(uint worker_id) {
+    assert(Thread::current()->is_ConcurrentGC_thread(),
+           "this should only be done by a conc GC thread");
+
+    CMRootRegions* root_regions = _cm->root_regions();
+    HeapRegion* hr = root_regions->claim_next();
+    while (hr != NULL) {
+      _cm->scanRootRegion(hr, worker_id);
+      hr = root_regions->claim_next();
+    }
+  }
+};
+
+void ConcurrentMark::scanRootRegions() {
+  // scan_in_progress() will have been set to true only if there was
+  // at least one root region to scan. So, if it's false, we
+  // should not attempt to do any further work.
+  if (root_regions()->scan_in_progress()) {
+    _parallel_marking_threads = calc_parallel_marking_threads();
+    assert(parallel_marking_threads() <= max_parallel_marking_threads(),
+           "Maximum number of marking threads exceeded");
+    uint active_workers = MAX2(1U, parallel_marking_threads());
+
+    CMRootRegionScanTask task(this);
+    if (parallel_marking_threads() > 0) {
+      _parallel_workers->set_active_workers((int) active_workers);
+      _parallel_workers->run_task(&task);
+    } else {
+      task.work(0);
+    }
+
+    // It's possible that has_aborted() is true here without actually
+    // aborting the survivor scan earlier. This is OK as it's
+    // mainly used for sanity checking.
+    root_regions()->scan_finished();
+  }
 }
 
 void ConcurrentMark::markFromRoots() {
@@ -1151,24 +1311,24 @@
   // stop-the-world GC happens even as we mark in this generation.
 
   _restart_for_overflow = false;
-
-  // Parallel task terminator is set in "set_phase()".
   force_overflow_conc()->init();
 
   // _g1h has _n_par_threads
-
   _parallel_marking_threads = calc_parallel_marking_threads();
   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
     "Maximum number of marking threads exceeded");
-  _parallel_workers->set_active_workers((int)_parallel_marking_threads);
-  // Don't set _n_par_threads because it affects MT in proceess_strong_roots()
-  // and the decisions on that MT processing is made elsewhere.
-
-  assert( _parallel_workers->active_workers() > 0, "Should have been set");
-  set_phase(_parallel_workers->active_workers(), true /* concurrent */);
+
+  uint active_workers = MAX2(1U, parallel_marking_threads());
+
+  // Parallel task terminator is set in "set_phase()"
+  set_phase(active_workers, true /* concurrent */);
 
   CMConcurrentMarkingTask markingTask(this, cmThread());
   if (parallel_marking_threads() > 0) {
+    _parallel_workers->set_active_workers((int)active_workers);
+    // Don't set _n_par_threads because it affects MT in proceess_strong_roots()
+    // and the decisions on that MT processing is made elsewhere.
+    assert(_parallel_workers->active_workers() > 0, "Should have been set");
     _parallel_workers->run_task(&markingTask);
   } else {
     markingTask.work(0);
@@ -1220,6 +1380,10 @@
       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
     }
   } else {
+    // Aggregate the per-task counting data that we have accumulated
+    // while marking.
+    aggregate_count_data();
+
     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
     // We're done with marking.
     // This is the end of  the marking cycle, we're expected all
@@ -1228,7 +1392,6 @@
                                        true /* expected_active */);
 
     if (VerifyDuringGC) {
-
       HandleMark hm;  // handle scope
       gclog_or_tty->print(" VerifyDuringGC:(after)");
       Universe::heap()->prepare_for_verify();
@@ -1258,48 +1421,41 @@
   g1p->record_concurrent_mark_remark_end();
 }
 
-#define CARD_BM_TEST_MODE 0
-
+// Used to calculate the # live objects per region
+// for verification purposes
 class CalcLiveObjectsClosure: public HeapRegionClosure {
 
   CMBitMapRO* _bm;
   ConcurrentMark* _cm;
-  bool _changed;
-  bool _yield;
-  size_t _words_done;
-  size_t _tot_live;
-  size_t _tot_used;
-  size_t _regions_done;
-  double _start_vtime_sec;
-
   BitMap* _region_bm;
   BitMap* _card_bm;
+
+  // Debugging
+  size_t _tot_words_done;
+  size_t _tot_live;
+  size_t _tot_used;
+
+  size_t _region_marked_bytes;
+
   intptr_t _bottom_card_num;
-  bool _final;
 
   void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
-    for (intptr_t i = start_card_num; i <= last_card_num; i++) {
-#if CARD_BM_TEST_MODE
-      guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
-#else
-      _card_bm->par_at_put(i - _bottom_card_num, 1);
-#endif
+    assert(start_card_num <= last_card_num, "sanity");
+    BitMap::idx_t start_idx = start_card_num - _bottom_card_num;
+    BitMap::idx_t last_idx = last_card_num - _bottom_card_num;
+
+    for (BitMap::idx_t i = start_idx; i <= last_idx; i += 1) {
+      _card_bm->par_at_put(i, 1);
     }
   }
 
 public:
-  CalcLiveObjectsClosure(bool final,
-                         CMBitMapRO *bm, ConcurrentMark *cm,
+  CalcLiveObjectsClosure(CMBitMapRO *bm, ConcurrentMark *cm,
                          BitMap* region_bm, BitMap* card_bm) :
-    _bm(bm), _cm(cm), _changed(false), _yield(true),
-    _words_done(0), _tot_live(0), _tot_used(0),
-    _region_bm(region_bm), _card_bm(card_bm),_final(final),
-    _regions_done(0), _start_vtime_sec(0.0)
-  {
-    _bottom_card_num =
-      intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
-               CardTableModRefBS::card_shift);
-  }
+    _bm(bm), _cm(cm), _region_bm(region_bm), _card_bm(card_bm),
+    _region_marked_bytes(0), _tot_words_done(0),
+    _tot_live(0), _tot_used(0),
+    _bottom_card_num(cm->heap_bottom_card_num()) { }
 
   // It takes a region that's not empty (i.e., it has at least one
   // live object in it and sets its corresponding bit on the region
@@ -1315,29 +1471,16 @@
       _region_bm->par_at_put((BitMap::idx_t) index, true);
     } else {
       // Starts humongous case: calculate how many regions are part of
-      // this humongous region and then set the bit range. It might
-      // have been a bit more efficient to look at the object that
-      // spans these humongous regions to calculate their number from
-      // the object's size. However, it's a good idea to calculate
-      // this based on the metadata itself, and not the region
-      // contents, so that this code is not aware of what goes into
-      // the humongous regions (in case this changes in the future).
+      // this humongous region and then set the bit range.
       G1CollectedHeap* g1h = G1CollectedHeap::heap();
-      size_t end_index = index + 1;
-      while (end_index < g1h->n_regions()) {
-        HeapRegion* chr = g1h->region_at(end_index);
-        if (!chr->continuesHumongous()) break;
-        end_index += 1;
-      }
+      HeapRegion *last_hr = g1h->heap_region_containing_raw(hr->end() - 1);
+      size_t end_index = last_hr->hrs_index() + 1;
       _region_bm->par_at_put_range((BitMap::idx_t) index,
                                    (BitMap::idx_t) end_index, true);
     }
   }
 
   bool doHeapRegion(HeapRegion* hr) {
-    if (!_final && _regions_done == 0) {
-      _start_vtime_sec = os::elapsedVTime();
-    }
 
     if (hr->continuesHumongous()) {
       // We will ignore these here and process them when their
@@ -1351,48 +1494,41 @@
     }
 
     HeapWord* nextTop = hr->next_top_at_mark_start();
-    HeapWord* start   = hr->top_at_conc_mark_count();
-    assert(hr->bottom() <= start && start <= hr->end() &&
-           hr->bottom() <= nextTop && nextTop <= hr->end() &&
-           start <= nextTop,
-           "Preconditions.");
-    // Otherwise, record the number of word's we'll examine.
+    HeapWord* start   = hr->bottom();
+
+    assert(start <= hr->end() && start <= nextTop && nextTop <= hr->end(),
+           err_msg("Preconditions not met - "
+                   "start: "PTR_FORMAT", nextTop: "PTR_FORMAT", end: "PTR_FORMAT,
+                   start, nextTop, hr->end()));
+
+    // Record the number of word's we'll examine.
     size_t words_done = (nextTop - start);
+
     // Find the first marked object at or after "start".
     start = _bm->getNextMarkedWordAddress(start, nextTop);
+
     size_t marked_bytes = 0;
 
     // Below, the term "card num" means the result of shifting an address
     // by the card shift -- address 0 corresponds to card number 0.  One
     // must subtract the card num of the bottom of the heap to obtain a
     // card table index.
+
     // The first card num of the sequence of live cards currently being
     // constructed.  -1 ==> no sequence.
     intptr_t start_card_num = -1;
+
     // The last card num of the sequence of live cards currently being
     // constructed.  -1 ==> no sequence.
     intptr_t last_card_num = -1;
 
     while (start < nextTop) {
-      if (_yield && _cm->do_yield_check()) {
-        // We yielded.  It might be for a full collection, in which case
-        // all bets are off; terminate the traversal.
-        if (_cm->has_aborted()) {
-          _changed = false;
-          return true;
-        } else {
-          // Otherwise, it might be a collection pause, and the region
-          // we're looking at might be in the collection set.  We'll
-          // abandon this region.
-          return false;
-        }
-      }
       oop obj = oop(start);
       int obj_sz = obj->size();
+
       // The card num of the start of the current object.
       intptr_t obj_card_num =
         intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
-
       HeapWord* obj_last = start + obj_sz - 1;
       intptr_t obj_last_card_num =
         intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
@@ -1410,110 +1546,404 @@
             start_card_num = obj_card_num;
           }
         }
-#if CARD_BM_TEST_MODE
-        /*
-        gclog_or_tty->print_cr("Setting bits from %d/%d.",
-                               obj_card_num - _bottom_card_num,
-                               obj_last_card_num - _bottom_card_num);
-        */
-        for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
-          _card_bm->par_at_put(j - _bottom_card_num, 1);
-        }
-#endif
       }
       // In any case, we set the last card num.
       last_card_num = obj_last_card_num;
 
       marked_bytes += (size_t)obj_sz * HeapWordSize;
+
       // Find the next marked object after this one.
       start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
-      _changed = true;
     }
+
     // Handle the last range, if any.
     if (start_card_num != -1) {
       mark_card_num_range(start_card_num, last_card_num);
     }
-    if (_final) {
-      // Mark the allocated-since-marking portion...
-      HeapWord* tp = hr->top();
-      if (nextTop < tp) {
-        start_card_num =
-          intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
-        last_card_num =
-          intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
-        mark_card_num_range(start_card_num, last_card_num);
-        // This definitely means the region has live objects.
-        set_bit_for_region(hr);
-      }
+
+    // Mark the allocated-since-marking portion...
+    HeapWord* top = hr->top();
+    if (nextTop < top) {
+      start_card_num = intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
+      last_card_num = intptr_t(uintptr_t(top) >> CardTableModRefBS::card_shift);
+
+      mark_card_num_range(start_card_num, last_card_num);
+
+      // This definitely means the region has live objects.
+      set_bit_for_region(hr);
     }
 
-    hr->add_to_marked_bytes(marked_bytes);
     // Update the live region bitmap.
     if (marked_bytes > 0) {
       set_bit_for_region(hr);
     }
-    hr->set_top_at_conc_mark_count(nextTop);
+
+    // Set the marked bytes for the current region so that
+    // it can be queried by a calling verificiation routine
+    _region_marked_bytes = marked_bytes;
+
     _tot_live += hr->next_live_bytes();
     _tot_used += hr->used();
-    _words_done = words_done;
-
-    if (!_final) {
-      ++_regions_done;
-      if (_regions_done % 10 == 0) {
-        double end_vtime_sec = os::elapsedVTime();
-        double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
-        if (elapsed_vtime_sec > (10.0 / 1000.0)) {
-          jlong sleep_time_ms =
-            (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
-          os::sleep(Thread::current(), sleep_time_ms, false);
-          _start_vtime_sec = end_vtime_sec;
-        }
-      }
-    }
+    _tot_words_done = words_done;
 
     return false;
   }
 
-  bool changed() { return _changed;  }
-  void reset()   { _changed = false; _words_done = 0; }
-  void no_yield() { _yield = false; }
-  size_t words_done() { return _words_done; }
-  size_t tot_live() { return _tot_live; }
-  size_t tot_used() { return _tot_used; }
+  size_t region_marked_bytes() const { return _region_marked_bytes; }
+
+  // Debugging
+  size_t tot_words_done() const      { return _tot_words_done; }
+  size_t tot_live() const            { return _tot_live; }
+  size_t tot_used() const            { return _tot_used; }
+};
+
+// Heap region closure used for verifying the counting data
+// that was accumulated concurrently and aggregated during
+// the remark pause. This closure is applied to the heap
+// regions during the STW cleanup pause.
+
+class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
+  ConcurrentMark* _cm;
+  CalcLiveObjectsClosure _calc_cl;
+  BitMap* _region_bm;   // Region BM to be verified
+  BitMap* _card_bm;     // Card BM to be verified
+  bool _verbose;        // verbose output?
+
+  BitMap* _exp_region_bm; // Expected Region BM values
+  BitMap* _exp_card_bm;   // Expected card BM values
+
+  int _failures;
+
+public:
+  VerifyLiveObjectDataHRClosure(ConcurrentMark* cm,
+                                BitMap* region_bm,
+                                BitMap* card_bm,
+                                BitMap* exp_region_bm,
+                                BitMap* exp_card_bm,
+                                bool verbose) :
+    _cm(cm),
+    _calc_cl(_cm->nextMarkBitMap(), _cm, exp_region_bm, exp_card_bm),
+    _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
+    _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
+    _failures(0) { }
+
+  int failures() const { return _failures; }
+
+  bool doHeapRegion(HeapRegion* hr) {
+    if (hr->continuesHumongous()) {
+      // We will ignore these here and process them when their
+      // associated "starts humongous" region is processed (see
+      // set_bit_for_heap_region()). Note that we cannot rely on their
+      // associated "starts humongous" region to have their bit set to
+      // 1 since, due to the region chunking in the parallel region
+      // iteration, a "continues humongous" region might be visited
+      // before its associated "starts humongous".
+      return false;
+    }
+
+    int failures = 0;
+
+    // Call the CalcLiveObjectsClosure to walk the marking bitmap for
+    // this region and set the corresponding bits in the expected region
+    // and card bitmaps.
+    bool res = _calc_cl.doHeapRegion(hr);
+    assert(res == false, "should be continuing");
+
+    MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
+                    Mutex::_no_safepoint_check_flag);
+
+    // Verify that _top_at_conc_count == ntams
+    if (hr->top_at_conc_mark_count() != hr->next_top_at_mark_start()) {
+      if (_verbose) {
+        gclog_or_tty->print_cr("Region " SIZE_FORMAT ": top at conc count incorrect: "
+                               "expected " PTR_FORMAT ", actual: " PTR_FORMAT,
+                               hr->hrs_index(), hr->next_top_at_mark_start(),
+                               hr->top_at_conc_mark_count());
+      }
+      failures += 1;
+    }
+
+    // Verify the marked bytes for this region.
+    size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
+    size_t act_marked_bytes = hr->next_marked_bytes();
+
+    // We're not OK if expected marked bytes > actual marked bytes. It means
+    // we have missed accounting some objects during the actual marking.
+    if (exp_marked_bytes > act_marked_bytes) {
+      if (_verbose) {
+        gclog_or_tty->print_cr("Region " SIZE_FORMAT ": marked bytes mismatch: "
+                               "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
+                               hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
+      }
+      failures += 1;
+    }
+
+    // Verify the bit, for this region, in the actual and expected
+    // (which was just calculated) region bit maps.
+    // We're not OK if the bit in the calculated expected region
+    // bitmap is set and the bit in the actual region bitmap is not.
+    BitMap::idx_t index = (BitMap::idx_t)hr->hrs_index();
+
+    bool expected = _exp_region_bm->at(index);
+    bool actual = _region_bm->at(index);
+    if (expected && !actual) {
+      if (_verbose) {
+        gclog_or_tty->print_cr("Region " SIZE_FORMAT ": region bitmap mismatch: "
+                               "expected: %d, actual: %d",
+                               hr->hrs_index(), expected, actual);
+      }
+      failures += 1;
+    }
+
+    // Verify that the card bit maps for the cards spanned by the current
+    // region match. We have an error if we have a set bit in the expected
+    // bit map and the corresponding bit in the actual bitmap is not set.
+
+    BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
+    BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
+
+    for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
+      expected = _exp_card_bm->at(i);
+      actual = _card_bm->at(i);
+
+      if (expected && !actual) {
+        if (_verbose) {
+          gclog_or_tty->print_cr("Region " SIZE_FORMAT ": card bitmap mismatch at " SIZE_FORMAT ": "
+                                 "expected: %d, actual: %d",
+                                 hr->hrs_index(), i, expected, actual);
+        }
+        failures += 1;
+      }
+    }
+
+    if (failures > 0 && _verbose)  {
+      gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
+                             "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
+                             HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(),
+                             _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
+    }
+
+    _failures += failures;
+
+    // We could stop iteration over the heap when we
+    // find the first voilating region by returning true.
+    return false;
+  }
 };
 
 
-void ConcurrentMark::calcDesiredRegions() {
-  _region_bm.clear();
-  _card_bm.clear();
-  CalcLiveObjectsClosure calccl(false /*final*/,
-                                nextMarkBitMap(), this,
-                                &_region_bm, &_card_bm);
-  G1CollectedHeap *g1h = G1CollectedHeap::heap();
-  g1h->heap_region_iterate(&calccl);
-
-  do {
-    calccl.reset();
-    g1h->heap_region_iterate(&calccl);
-  } while (calccl.changed());
-}
+class G1ParVerifyFinalCountTask: public AbstractGangTask {
+protected:
+  G1CollectedHeap* _g1h;
+  ConcurrentMark* _cm;
+  BitMap* _actual_region_bm;
+  BitMap* _actual_card_bm;
+
+  uint    _n_workers;
+
+  BitMap* _expected_region_bm;
+  BitMap* _expected_card_bm;
+
+  int  _failures;
+  bool _verbose;
+
+public:
+  G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
+                            BitMap* region_bm, BitMap* card_bm,
+                            BitMap* expected_region_bm, BitMap* expected_card_bm)
+    : AbstractGangTask("G1 verify final counting"),
+      _g1h(g1h), _cm(_g1h->concurrent_mark()),
+      _actual_region_bm(region_bm), _actual_card_bm(card_bm),
+      _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
+      _failures(0), _verbose(false),
+      _n_workers(0) {
+    assert(VerifyDuringGC, "don't call this otherwise");
+
+    // Use the value already set as the number of active threads
+    // in the call to run_task().
+    if (G1CollectedHeap::use_parallel_gc_threads()) {
+      assert( _g1h->workers()->active_workers() > 0,
+        "Should have been previously set");
+      _n_workers = _g1h->workers()->active_workers();
+    } else {
+      _n_workers = 1;
+    }
+
+    assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
+    assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
+
+    _verbose = _cm->verbose_medium();
+  }
+
+  void work(uint worker_id) {
+    assert(worker_id < _n_workers, "invariant");
+
+    VerifyLiveObjectDataHRClosure verify_cl(_cm,
+                                            _actual_region_bm, _actual_card_bm,
+                                            _expected_region_bm,
+                                            _expected_card_bm,
+                                            _verbose);
+
+    if (G1CollectedHeap::use_parallel_gc_threads()) {
+      _g1h->heap_region_par_iterate_chunked(&verify_cl,
+                                            worker_id,
+                                            _n_workers,
+                                            HeapRegion::VerifyCountClaimValue);
+    } else {
+      _g1h->heap_region_iterate(&verify_cl);
+    }
+
+    Atomic::add(verify_cl.failures(), &_failures);
+  }
+
+  int failures() const { return _failures; }
+};
+
+// Final update of count data (during cleanup).
+// Adds [top_at_count, NTAMS) to the marked bytes for each
+// region. Sets the bits in the card bitmap corresponding
+// to the interval [top_at_count, top], and sets the
+// liveness bit for each region containing live data
+// in the region bitmap.
+
+class FinalCountDataUpdateClosure: public HeapRegionClosure {
+  ConcurrentMark* _cm;
+  BitMap* _region_bm;
+  BitMap* _card_bm;
+
+  size_t _total_live_bytes;
+  size_t _total_used_bytes;
+  size_t _total_words_done;
+
+  void set_card_bitmap_range(BitMap::idx_t start_idx, BitMap::idx_t last_idx) {
+    assert(start_idx <= last_idx, "sanity");
+
+    // Set the inclusive bit range [start_idx, last_idx].
+    // For small ranges (up to 8 cards) use a simple loop; otherwise
+    // use par_at_put_range.
+    if ((last_idx - start_idx) <= 8) {
+      for (BitMap::idx_t i = start_idx; i <= last_idx; i += 1) {
+        _card_bm->par_set_bit(i);
+      }
+    } else {
+      assert(last_idx < _card_bm->size(), "sanity");
+      // Note BitMap::par_at_put_range() is exclusive.
+      _card_bm->par_at_put_range(start_idx, last_idx+1, true);
+    }
+  }
+
+  // It takes a region that's not empty (i.e., it has at least one
+  // live object in it and sets its corresponding bit on the region
+  // bitmap to 1. If the region is "starts humongous" it will also set
+  // to 1 the bits on the region bitmap that correspond to its
+  // associated "continues humongous" regions.
+  void set_bit_for_region(HeapRegion* hr) {
+    assert(!hr->continuesHumongous(), "should have filtered those out");
+
+    size_t index = hr->hrs_index();
+    if (!hr->startsHumongous()) {
+      // Normal (non-humongous) case: just set the bit.
+      _region_bm->par_set_bit((BitMap::idx_t) index);
+    } else {
+      // Starts humongous case: calculate how many regions are part of
+      // this humongous region and then set the bit range.
+      G1CollectedHeap* g1h = G1CollectedHeap::heap();
+      HeapRegion *last_hr = g1h->heap_region_containing_raw(hr->end() - 1);
+      size_t end_index = last_hr->hrs_index() + 1;
+      _region_bm->par_at_put_range((BitMap::idx_t) index,
+                                   (BitMap::idx_t) end_index, true);
+    }
+  }
+
+ public:
+  FinalCountDataUpdateClosure(ConcurrentMark* cm,
+                              BitMap* region_bm,
+                              BitMap* card_bm) :
+    _cm(cm), _region_bm(region_bm), _card_bm(card_bm),
+    _total_words_done(0), _total_live_bytes(0), _total_used_bytes(0) { }
+
+  bool doHeapRegion(HeapRegion* hr) {
+
+    if (hr->continuesHumongous()) {
+      // We will ignore these here and process them when their
+      // associated "starts humongous" region is processed (see
+      // set_bit_for_heap_region()). Note that we cannot rely on their
+      // associated "starts humongous" region to have their bit set to
+      // 1 since, due to the region chunking in the parallel region
+      // iteration, a "continues humongous" region might be visited
+      // before its associated "starts humongous".
+      return false;
+    }
+
+    HeapWord* start = hr->top_at_conc_mark_count();
+    HeapWord* ntams = hr->next_top_at_mark_start();
+    HeapWord* top   = hr->top();
+
+    assert(hr->bottom() <= start && start <= hr->end() &&
+           hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
+
+    size_t words_done = ntams - hr->bottom();
+
+    if (start < ntams) {
+      // Region was changed between remark and cleanup pauses
+      // We need to add (ntams - start) to the marked bytes
+      // for this region, and set bits for the range
+      // [ card_idx(start), card_idx(ntams) ) in the card bitmap.
+      size_t live_bytes = (ntams - start) * HeapWordSize;
+      hr->add_to_marked_bytes(live_bytes);
+
+      // Record the new top at conc count
+      hr->set_top_at_conc_mark_count(ntams);
+
+      // The setting of the bits in the card bitmap takes place below
+    }
+
+    // Mark the allocated-since-marking portion...
+    if (ntams < top) {
+      // This definitely means the region has live objects.
+      set_bit_for_region(hr);
+    }
+
+    // Now set the bits for [start, top]
+    BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
+    BitMap::idx_t last_idx = _cm->card_bitmap_index_for(top);
+    set_card_bitmap_range(start_idx, last_idx);
+
+    // Set the bit for the region if it contains live data
+    if (hr->next_marked_bytes() > 0) {
+      set_bit_for_region(hr);
+    }
+
+    _total_words_done += words_done;
+    _total_used_bytes += hr->used();
+    _total_live_bytes += hr->next_marked_bytes();
+
+    return false;
+  }
+
+  size_t total_words_done() const { return _total_words_done; }
+  size_t total_live_bytes() const { return _total_live_bytes; }
+  size_t total_used_bytes() const { return _total_used_bytes; }
+};
 
 class G1ParFinalCountTask: public AbstractGangTask {
 protected:
   G1CollectedHeap* _g1h;
-  CMBitMap* _bm;
-  size_t _n_workers;
+  ConcurrentMark* _cm;
+  BitMap* _actual_region_bm;
+  BitMap* _actual_card_bm;
+
+  uint    _n_workers;
+
   size_t *_live_bytes;
   size_t *_used_bytes;
-  BitMap* _region_bm;
-  BitMap* _card_bm;
+
 public:
-  G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
-                      BitMap* region_bm, BitMap* card_bm)
-    : AbstractGangTask("G1 final counting"), _g1h(g1h),
-    _bm(bm), _region_bm(region_bm), _card_bm(card_bm),
-    _n_workers(0)
-  {
+  G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
+    : AbstractGangTask("G1 final counting"),
+      _g1h(g1h), _cm(_g1h->concurrent_mark()),
+      _actual_region_bm(region_bm), _actual_card_bm(card_bm),
+      _n_workers(0) {
     // Use the value already set as the number of active threads
     // in the call to run_task().  Needed for the allocation of
     // _live_bytes and _used_bytes.
@@ -1534,33 +1964,36 @@
     FREE_C_HEAP_ARRAY(size_t, _used_bytes);
   }
 
-  void work(int i) {
-    CalcLiveObjectsClosure calccl(true /*final*/,
-                                  _bm, _g1h->concurrent_mark(),
-                                  _region_bm, _card_bm);
-    calccl.no_yield();
+  void work(uint worker_id) {
+    assert(worker_id < _n_workers, "invariant");
+
+    FinalCountDataUpdateClosure final_update_cl(_cm,
+                                                _actual_region_bm,
+                                                _actual_card_bm);
+
     if (G1CollectedHeap::use_parallel_gc_threads()) {
-      _g1h->heap_region_par_iterate_chunked(&calccl, i,
-                                            (int) _n_workers,
+      _g1h->heap_region_par_iterate_chunked(&final_update_cl,
+                                            worker_id,
+                                            _n_workers,
                                             HeapRegion::FinalCountClaimValue);
     } else {
-      _g1h->heap_region_iterate(&calccl);
+      _g1h->heap_region_iterate(&final_update_cl);
     }
-    assert(calccl.complete(), "Shouldn't have yielded!");
-
-    assert((size_t) i < _n_workers, "invariant");
-    _live_bytes[i] = calccl.tot_live();
-    _used_bytes[i] = calccl.tot_used();
-  }
+
+    _live_bytes[worker_id] = final_update_cl.total_live_bytes();
+    _used_bytes[worker_id] = final_update_cl.total_used_bytes();
+  }
+
   size_t live_bytes()  {
     size_t live_bytes = 0;
-    for (size_t i = 0; i < _n_workers; ++i)
+    for (uint i = 0; i < _n_workers; ++i)
       live_bytes += _live_bytes[i];
     return live_bytes;
   }
+
   size_t used_bytes()  {
     size_t used_bytes = 0;
-    for (size_t i = 0; i < _n_workers; ++i)
+    for (uint i = 0; i < _n_workers; ++i)
       used_bytes += _used_bytes[i];
     return used_bytes;
   }
@@ -1645,18 +2078,18 @@
     AbstractGangTask("G1 note end"), _g1h(g1h),
     _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
 
-  void work(int i) {
+  void work(uint worker_id) {
     double start = os::elapsedTime();
     FreeRegionList local_cleanup_list("Local Cleanup List");
     OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set");
     HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
     HRRSCleanupTask hrrs_cleanup_task;
-    G1NoteEndOfConcMarkClosure g1_note_end(_g1h, i, &local_cleanup_list,
+    G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list,
                                            &old_proxy_set,
                                            &humongous_proxy_set,
                                            &hrrs_cleanup_task);
     if (G1CollectedHeap::use_parallel_gc_threads()) {
-      _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
+      _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
                                             _g1h->workers()->active_workers(),
                                             HeapRegion::NoteEndClaimValue);
     } else {
@@ -1700,8 +2133,8 @@
     double end = os::elapsedTime();
     if (G1PrintParCleanupStats) {
       gclog_or_tty->print("     Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
-                          "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
-                          i, start, end, (end-start)*1000.0,
+                          "claimed %u regions (tot = %8.3f ms, max = %8.3f ms).\n",
+                          worker_id, start, end, (end-start)*1000.0,
                           g1_note_end.regions_claimed(),
                           g1_note_end.claimed_region_time_sec()*1000.0,
                           g1_note_end.max_region_time_sec()*1000.0);
@@ -1720,12 +2153,11 @@
   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
                        BitMap* region_bm, BitMap* card_bm) :
     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
-    _region_bm(region_bm), _card_bm(card_bm)
-  {}
-
-  void work(int i) {
+    _region_bm(region_bm), _card_bm(card_bm) { }
+
+  void work(uint worker_id) {
     if (G1CollectedHeap::use_parallel_gc_threads()) {
-      _g1rs->scrub_par(_region_bm, _card_bm, i,
+      _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
                        HeapRegion::ScrubRemSetClaimValue);
     } else {
       _g1rs->scrub(_region_bm, _card_bm);
@@ -1765,30 +2197,59 @@
 
   HeapRegionRemSet::reset_for_cleanup_tasks();
 
-  g1h->set_par_threads();
-  size_t n_workers = g1h->n_par_threads();
+  uint n_workers;
 
   // Do counting once more with the world stopped for good measure.
-  G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
-                                        &_region_bm, &_card_bm);
+  G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
+
   if (G1CollectedHeap::use_parallel_gc_threads()) {
-    assert(g1h->check_heap_region_claim_values(
-                                               HeapRegion::InitialClaimValue),
+   assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
            "sanity check");
 
-    assert(g1h->n_par_threads() == (int) n_workers,
-      "Should not have been reset");
+    g1h->set_par_threads();
+    n_workers = g1h->n_par_threads();
+    assert(g1h->n_par_threads() == n_workers,
+           "Should not have been reset");
     g1h->workers()->run_task(&g1_par_count_task);
     // Done with the parallel phase so reset to 0.
     g1h->set_par_threads(0);
 
-    assert(g1h->check_heap_region_claim_values(
-                                             HeapRegion::FinalCountClaimValue),
+    assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
            "sanity check");
   } else {
+    n_workers = 1;
     g1_par_count_task.work(0);
   }
 
+  if (VerifyDuringGC) {
+    // Verify that the counting data accumulated during marking matches
+    // that calculated by walking the marking bitmap.
+
+    // Bitmaps to hold expected values
+    BitMap expected_region_bm(_region_bm.size(), false);
+    BitMap expected_card_bm(_card_bm.size(), false);
+
+    G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
+                                                 &_region_bm,
+                                                 &_card_bm,
+                                                 &expected_region_bm,
+                                                 &expected_card_bm);
+
+    if (G1CollectedHeap::use_parallel_gc_threads()) {
+      g1h->set_par_threads((int)n_workers);
+      g1h->workers()->run_task(&g1_par_verify_task);
+      // Done with the parallel phase so reset to 0.
+      g1h->set_par_threads(0);
+
+      assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
+             "sanity check");
+    } else {
+      g1_par_verify_task.work(0);
+    }
+
+    guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
+  }
+
   size_t known_garbage_bytes =
     g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
   g1p->set_known_garbage_bytes(known_garbage_bytes);
@@ -1851,7 +2312,6 @@
                            (note_end_end - note_end_start)*1000.0);
   }
 
-
   // call below, since it affects the metric by which we sort the heap
   // regions.
   if (G1ScrubRemSets) {
@@ -1882,10 +2342,6 @@
   double end = os::elapsedTime();
   _cleanup_times.add((end - start) * 1000.0);
 
-  // G1CollectedHeap::heap()->print();
-  // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
-  // G1CollectedHeap::heap()->get_gc_time_stamp());
-
   if (PrintGC || PrintGCDetails) {
     g1h->print_size_transition(gclog_or_tty,
                                start_used_bytes,
@@ -1904,6 +2360,10 @@
   // races with it goes around and waits for completeCleanup to finish.
   g1h->increment_total_collections();
 
+  // We reclaimed old regions so we should calculate the sizes to make
+  // sure we update the old gen/space data.
+  g1h->g1mm()->update_sizes();
+
   if (VerifyDuringGC) {
     HandleMark hm;  // handle scope
     gclog_or_tty->print(" VerifyDuringGC:(after)");
@@ -1982,12 +2442,11 @@
 class G1CMKeepAliveClosure: public OopClosure {
   G1CollectedHeap* _g1;
   ConcurrentMark*  _cm;
-  CMBitMap*        _bitMap;
  public:
-  G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
-                       CMBitMap* bitMap) :
-    _g1(g1), _cm(cm),
-    _bitMap(bitMap) {}
+  G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm) :
+    _g1(g1), _cm(cm) {
+    assert(Thread::current()->is_VM_thread(), "otherwise fix worker id");
+  }
 
   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
   virtual void do_oop(      oop* p) { do_oop_work(p); }
@@ -2003,26 +2462,25 @@
     }
 
     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
-      _bitMap->mark(addr);
+      _cm->mark_and_count(obj);
       _cm->mark_stack_push(obj);
     }
   }
 };
 
 class G1CMDrainMarkingStackClosure: public VoidClosure {
+  ConcurrentMark*               _cm;
   CMMarkStack*                  _markStack;
-  CMBitMap*                     _bitMap;
   G1CMKeepAliveClosure*         _oopClosure;
  public:
-  G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
+  G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMMarkStack* markStack,
                                G1CMKeepAliveClosure* oopClosure) :
-    _bitMap(bitMap),
+    _cm(cm),
     _markStack(markStack),
-    _oopClosure(oopClosure)
-  {}
+    _oopClosure(oopClosure) { }
 
   void do_void() {
-    _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
+    _markStack->drain((OopClosure*)_oopClosure, _cm->nextMarkBitMap(), false);
   }
 };
 
@@ -2101,8 +2559,7 @@
   CMTask* _task;
  public:
   G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :
-    _cm(cm), _task(task)
-  {}
+    _cm(cm), _task(task) { }
 
   void do_void() {
     do {
@@ -2167,13 +2624,13 @@
     AbstractGangTask("Process reference objects in parallel"),
     _proc_task(proc_task), _g1h(g1h), _cm(cm) { }
 
-  virtual void work(int i) {
-    CMTask* marking_task = _cm->task(i);
+  virtual void work(uint worker_id) {
+    CMTask* marking_task = _cm->task(worker_id);
     G1CMIsAliveClosure g1_is_alive(_g1h);
     G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task);
     G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
 
-    _proc_task.work(i, g1_is_alive, g1_par_keep_alive, g1_par_drain);
+    _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
   }
 };
 
@@ -2199,8 +2656,8 @@
     AbstractGangTask("Enqueue reference objects in parallel"),
     _enq_task(enq_task) { }
 
-  virtual void work(int i) {
-    _enq_task.work(i);
+  virtual void work(uint worker_id) {
+    _enq_task.work(worker_id);
   }
 };
 
@@ -2241,14 +2698,14 @@
     rp->setup_policy(clear_all_soft_refs);
     assert(_markStack.isEmpty(), "mark stack should be empty");
 
-    G1CMKeepAliveClosure g1_keep_alive(g1h, this, nextMarkBitMap());
+    G1CMKeepAliveClosure g1_keep_alive(g1h, this);
     G1CMDrainMarkingStackClosure
-      g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive);
+      g1_drain_mark_stack(this, &_markStack, &g1_keep_alive);
 
     // We use the work gang from the G1CollectedHeap and we utilize all
     // the worker threads.
-    int active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1;
-    active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1);
+    uint active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1U;
+    active_workers = MAX2(MIN2(active_workers, _max_task_num), 1U);
 
     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
                                               g1h->workers(), active_workers);
@@ -2312,11 +2769,11 @@
   ConcurrentMark *_cm;
 
 public:
-  void work(int worker_i) {
+  void work(uint worker_id) {
     // Since all available tasks are actually started, we should
     // only proceed if we're supposed to be actived.
-    if ((size_t)worker_i < _cm->active_tasks()) {
-      CMTask* task = _cm->task(worker_i);
+    if (worker_id < _cm->active_tasks()) {
+      CMTask* task = _cm->task(worker_id);
       task->record_start_time();
       do {
         task->do_marking_step(1000000000.0 /* something very large */,
@@ -2329,9 +2786,9 @@
     }
   }
 
-  CMRemarkTask(ConcurrentMark* cm) :
+  CMRemarkTask(ConcurrentMark* cm, int active_workers) :
     AbstractGangTask("Par Remark"), _cm(cm) {
-    _cm->terminator()->reset_for_reuse(cm->_g1h->workers()->active_workers());
+    _cm->terminator()->reset_for_reuse(active_workers);
   }
 };
 
@@ -2345,10 +2802,10 @@
   if (G1CollectedHeap::use_parallel_gc_threads()) {
     G1CollectedHeap::StrongRootsScope srs(g1h);
     // this is remark, so we'll use up all active threads
-    int active_workers = g1h->workers()->active_workers();
+    uint active_workers = g1h->workers()->active_workers();
     if (active_workers == 0) {
       assert(active_workers > 0, "Should have been set earlier");
-      active_workers = ParallelGCThreads;
+      active_workers = (uint) ParallelGCThreads;
       g1h->workers()->set_active_workers(active_workers);
     }
     set_phase(active_workers, false /* concurrent */);
@@ -2357,17 +2814,17 @@
     // constructor and pass values of the active workers
     // through the gang in the task.
 
-    CMRemarkTask remarkTask(this);
+    CMRemarkTask remarkTask(this, active_workers);
     g1h->set_par_threads(active_workers);
     g1h->workers()->run_task(&remarkTask);
     g1h->set_par_threads(0);
   } else {
     G1CollectedHeap::StrongRootsScope srs(g1h);
     // this is remark, so we'll use up all available threads
-    int active_workers = 1;
+    uint active_workers = 1;
     set_phase(active_workers, false /* concurrent */);
 
-    CMRemarkTask remarkTask(this);
+    CMRemarkTask remarkTask(this, active_workers);
     // We will start all available threads, even if we decide that the
     // active_workers will be fewer. The extra ones will just bail out
     // immediately.
@@ -2615,18 +3072,6 @@
 // during an evacuation pause). This was a late change to the code and
 // is currently not being taken advantage of.
 
-class CMGlobalObjectClosure : public ObjectClosure {
-private:
-  ConcurrentMark* _cm;
-
-public:
-  void do_object(oop obj) {
-    _cm->deal_with_reference(obj);
-  }
-
-  CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
-};
-
 void ConcurrentMark::deal_with_reference(oop obj) {
   if (verbose_high()) {
     gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
@@ -2671,7 +3116,21 @@
   }
 }
 
+class CMGlobalObjectClosure : public ObjectClosure {
+private:
+  ConcurrentMark* _cm;
+
+public:
+  void do_object(oop obj) {
+    _cm->deal_with_reference(obj);
+  }
+
+  CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
+};
+
 void ConcurrentMark::drainAllSATBBuffers() {
+  guarantee(false, "drainAllSATBBuffers(): don't call this any more");
+
   CMGlobalObjectClosure oc(this);
   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
   satb_mq_set.set_closure(&oc);
@@ -2690,28 +3149,21 @@
   assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
 }
 
-void ConcurrentMark::markPrev(oop p) {
-  // Note we are overriding the read-only view of the prev map here, via
-  // the cast.
-  ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
-}
-
-void ConcurrentMark::clear(oop p) {
-  assert(p != NULL && p->is_oop(), "expected an oop");
-  HeapWord* addr = (HeapWord*)p;
-  assert(addr >= _nextMarkBitMap->startWord() ||
-         addr < _nextMarkBitMap->endWord(), "in a region");
-
-  _nextMarkBitMap->clear(addr);
-}
-
-void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
+void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
   // Note we are overriding the read-only view of the prev map here, via
   // the cast.
   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
+}
+
+void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
   _nextMarkBitMap->clearRange(mr);
 }
 
+void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {
+  clearRangePrevBitmap(mr);
+  clearRangeNextBitmap(mr);
+}
+
 HeapRegion*
 ConcurrentMark::claim_region(int task_num) {
   // "checkpoint" the finger
@@ -2806,6 +3258,9 @@
 }
 
 bool ConcurrentMark::invalidate_aborted_regions_in_cset() {
+  guarantee(false, "invalidate_aborted_regions_in_cset(): "
+                   "don't call this any more");
+
   bool result = false;
   for (int i = 0; i < (int)_max_task_num; ++i) {
     CMTask* the_task = _tasks[i];
@@ -2857,25 +3312,136 @@
     // ...then over the contents of the all the task queues.
     queue->oops_do(cl);
   }
-
-  // Invalidate any entries, that are in the region stack, that
-  // point into the collection set
-  if (_regionStack.invalidate_entries_into_cset()) {
-    // otherwise, any gray objects copied during the evacuation pause
-    // might not be visited.
-    assert(_should_gray_objects, "invariant");
-  }
-
-  // Invalidate any aborted regions, recorded in the individual CM
-  // tasks, that point into the collection set.
-  if (invalidate_aborted_regions_in_cset()) {
-    // otherwise, any gray objects copied during the evacuation pause
-    // might not be visited.
-    assert(_should_gray_objects, "invariant");
-  }
-
 }
 
+#ifndef PRODUCT
+enum VerifyNoCSetOopsPhase {
+  VerifyNoCSetOopsStack,
+  VerifyNoCSetOopsQueues,
+  VerifyNoCSetOopsSATBCompleted,
+  VerifyNoCSetOopsSATBThread
+};
+
+class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {
+private:
+  G1CollectedHeap* _g1h;
+  VerifyNoCSetOopsPhase _phase;
+  int _info;
+
+  const char* phase_str() {
+    switch (_phase) {
+    case VerifyNoCSetOopsStack:         return "Stack";
+    case VerifyNoCSetOopsQueues:        return "Queue";
+    case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
+    case VerifyNoCSetOopsSATBThread:    return "Thread SATB Buffers";
+    default:                            ShouldNotReachHere();
+    }
+    return NULL;
+  }
+
+  void do_object_work(oop obj) {
+    guarantee(!_g1h->obj_in_cs(obj),
+              err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
+                      (void*) obj, phase_str(), _info));
+  }
+
+public:
+  VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
+
+  void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
+    _phase = phase;
+    _info = info;
+  }
+
+  virtual void do_oop(oop* p) {
+    oop obj = oopDesc::load_decode_heap_oop(p);
+    do_object_work(obj);
+  }
+
+  virtual void do_oop(narrowOop* p) {
+    // We should not come across narrow oops while scanning marking
+    // stacks and SATB buffers.
+    ShouldNotReachHere();
+  }
+
+  virtual void do_object(oop obj) {
+    do_object_work(obj);
+  }
+};
+
+void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
+                                         bool verify_enqueued_buffers,
+                                         bool verify_thread_buffers,
+                                         bool verify_fingers) {
+  assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
+  if (!G1CollectedHeap::heap()->mark_in_progress()) {
+    return;
+  }
+
+  VerifyNoCSetOopsClosure cl;
+
+  if (verify_stacks) {
+    // Verify entries on the global mark stack
+    cl.set_phase(VerifyNoCSetOopsStack);
+    _markStack.oops_do(&cl);
+
+    // Verify entries on the task queues
+    for (int i = 0; i < (int) _max_task_num; i += 1) {
+      cl.set_phase(VerifyNoCSetOopsQueues, i);
+      OopTaskQueue* queue = _task_queues->queue(i);
+      queue->oops_do(&cl);
+    }
+  }
+
+  SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
+
+  // Verify entries on the enqueued SATB buffers
+  if (verify_enqueued_buffers) {
+    cl.set_phase(VerifyNoCSetOopsSATBCompleted);
+    satb_qs.iterate_completed_buffers_read_only(&cl);
+  }
+
+  // Verify entries on the per-thread SATB buffers
+  if (verify_thread_buffers) {
+    cl.set_phase(VerifyNoCSetOopsSATBThread);
+    satb_qs.iterate_thread_buffers_read_only(&cl);
+  }
+
+  if (verify_fingers) {
+    // Verify the global finger
+    HeapWord* global_finger = finger();
+    if (global_finger != NULL && global_finger < _heap_end) {
+      // The global finger always points to a heap region boundary. We
+      // use heap_region_containing_raw() to get the containing region
+      // given that the global finger could be pointing to a free region
+      // which subsequently becomes continues humongous. If that
+      // happens, heap_region_containing() will return the bottom of the
+      // corresponding starts humongous region and the check below will
+      // not hold any more.
+      HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
+      guarantee(global_finger == global_hr->bottom(),
+                err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
+                        global_finger, HR_FORMAT_PARAMS(global_hr)));
+    }
+
+    // Verify the task fingers
+    assert(parallel_marking_threads() <= _max_task_num, "sanity");
+    for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
+      CMTask* task = _tasks[i];
+      HeapWord* task_finger = task->finger();
+      if (task_finger != NULL && task_finger < _heap_end) {
+        // See above note on the global finger verification.
+        HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
+        guarantee(task_finger == task_hr->bottom() ||
+                  !task_hr->in_collection_set(),
+                  err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
+                          task_finger, HR_FORMAT_PARAMS(task_hr)));
+      }
+    }
+  }
+}
+#endif // PRODUCT
+
 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
   _markStack.setEmpty();
   _markStack.clear_overflow();
@@ -2896,6 +3462,192 @@
   }
 }
 
+// Aggregate the counting data that was constructed concurrently
+// with marking.
+class AggregateCountDataHRClosure: public HeapRegionClosure {
+  ConcurrentMark* _cm;
+  BitMap* _cm_card_bm;
+  size_t _max_task_num;
+
+ public:
+  AggregateCountDataHRClosure(ConcurrentMark *cm,
+                              BitMap* cm_card_bm,
+                              size_t max_task_num) :
+    _cm(cm), _cm_card_bm(cm_card_bm),
+    _max_task_num(max_task_num) { }
+
+  bool is_card_aligned(HeapWord* p) {
+    return ((uintptr_t(p) & (CardTableModRefBS::card_size - 1)) == 0);
+  }
+
+  bool doHeapRegion(HeapRegion* hr) {
+    if (hr->continuesHumongous()) {
+      // We will ignore these here and process them when their
+      // associated "starts humongous" region is processed.
+      // Note that we cannot rely on their associated
+      // "starts humongous" region to have their bit set to 1
+      // since, due to the region chunking in the parallel region
+      // iteration, a "continues humongous" region might be visited
+      // before its associated "starts humongous".
+      return false;
+    }
+
+    HeapWord* start = hr->bottom();
+    HeapWord* limit = hr->next_top_at_mark_start();
+    HeapWord* end = hr->end();
+
+    assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
+           err_msg("Preconditions not met - "
+                   "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
+                   "top: "PTR_FORMAT", end: "PTR_FORMAT,
+                   start, limit, hr->top(), hr->end()));
+
+    assert(hr->next_marked_bytes() == 0, "Precondition");
+
+    if (start == limit) {
+      // NTAMS of this region has not been set so nothing to do.
+      return false;
+    }
+
+    assert(is_card_aligned(start), "sanity");
+    assert(is_card_aligned(end), "sanity");
+
+    BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
+    BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
+    BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
+
+    // If ntams is not card aligned then we bump the index for
+    // limit so that we get the card spanning ntams.
+    if (!is_card_aligned(limit)) {
+      limit_idx += 1;
+    }
+
+    assert(limit_idx <= end_idx, "or else use atomics");
+
+    // Aggregate the "stripe" in the count data associated with hr.
+    size_t hrs_index = hr->hrs_index();
+    size_t marked_bytes = 0;
+
+    for (int i = 0; (size_t)i < _max_task_num; i += 1) {
+      size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
+      BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
+
+      // Fetch the marked_bytes in this region for task i and
+      // add it to the running total for this region.
+      marked_bytes += marked_bytes_array[hrs_index];
+
+      // Now union the bitmaps[0,max_task_num)[start_idx..limit_idx)
+      // into the global card bitmap.
+      BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
+
+      while (scan_idx < limit_idx) {
+        assert(task_card_bm->at(scan_idx) == true, "should be");
+        _cm_card_bm->set_bit(scan_idx);
+        assert(_cm_card_bm->at(scan_idx) == true, "should be");
+
+        // BitMap::get_next_one_offset() can handle the case when
+        // its left_offset parameter is greater than its right_offset
+        // parameter. If does, however, have an early exit if
+        // left_offset == right_offset. So let's limit the value
+        // passed in for left offset here.
+        BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
+        scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
+      }
+    }
+
+    // Update the marked bytes for this region.
+    hr->add_to_marked_bytes(marked_bytes);
+
+    // Now set the top at count to NTAMS.
+    hr->set_top_at_conc_mark_count(limit);
+
+    // Next heap region
+    return false;
+  }
+};
+
+class G1AggregateCountDataTask: public AbstractGangTask {
+protected:
+  G1CollectedHeap* _g1h;
+  ConcurrentMark* _cm;
+  BitMap* _cm_card_bm;
+  size_t _max_task_num;
+  int _active_workers;
+
+public:
+  G1AggregateCountDataTask(G1CollectedHeap* g1h,
+                           ConcurrentMark* cm,
+                           BitMap* cm_card_bm,
+                           size_t max_task_num,
+                           int n_workers) :
+    AbstractGangTask("Count Aggregation"),
+    _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
+    _max_task_num(max_task_num),
+    _active_workers(n_workers) { }
+
+  void work(uint worker_id) {
+    AggregateCountDataHRClosure cl(_cm, _cm_card_bm, _max_task_num);
+
+    if (G1CollectedHeap::use_parallel_gc_threads()) {
+      _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
+                                            _active_workers,
+                                            HeapRegion::AggregateCountClaimValue);
+    } else {
+      _g1h->heap_region_iterate(&cl);
+    }
+  }
+};
+
+
+void ConcurrentMark::aggregate_count_data() {
+  int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
+                        _g1h->workers()->active_workers() :
+                        1);
+
+  G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
+                                           _max_task_num, n_workers);
+
+  if (G1CollectedHeap::use_parallel_gc_threads()) {
+    assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
+           "sanity check");
+    _g1h->set_par_threads(n_workers);
+    _g1h->workers()->run_task(&g1_par_agg_task);
+    _g1h->set_par_threads(0);
+
+    assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
+           "sanity check");
+    _g1h->reset_heap_region_claim_values();
+  } else {
+    g1_par_agg_task.work(0);
+  }
+}
+
+// Clear the per-worker arrays used to store the per-region counting data
+void ConcurrentMark::clear_all_count_data() {
+  // Clear the global card bitmap - it will be filled during
+  // liveness count aggregation (during remark) and the
+  // final counting task.
+  _card_bm.clear();
+
+  // Clear the global region bitmap - it will be filled as part
+  // of the final counting task.
+  _region_bm.clear();
+
+  size_t max_regions = _g1h->max_regions();
+  assert(_max_task_num != 0, "unitialized");
+
+  for (int i = 0; (size_t) i < _max_task_num; i += 1) {
+    BitMap* task_card_bm = count_card_bitmap_for(i);
+    size_t* marked_bytes_array = count_marked_bytes_array_for(i);
+
+    assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
+    assert(marked_bytes_array != NULL, "uninitialized");
+
+    memset(marked_bytes_array, 0, (max_regions * sizeof(size_t)));
+    task_card_bm->clear();
+  }
+}
+
 void ConcurrentMark::print_stats() {
   if (verbose_stats()) {
     gclog_or_tty->print_cr("---------------------------------------------------------------------");
@@ -2919,7 +3671,7 @@
   int              _ms_size;
   int              _ms_ind;
   int              _array_increment;
-  int              _worker_i;
+  uint             _worker_id;
 
   bool push(oop obj, int arr_ind = 0) {
     if (_ms_ind == _ms_size) {
@@ -2969,7 +3721,7 @@
   }
 
 public:
-  CSetMarkOopClosure(ConcurrentMark* cm, int ms_size, int worker_i) :
+  CSetMarkOopClosure(ConcurrentMark* cm, int ms_size, uint worker_id) :
     _g1h(G1CollectedHeap::heap()),
     _cm(cm),
     _bm(cm->nextMarkBitMap()),
@@ -2977,7 +3729,7 @@
     _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
     _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
     _array_increment(MAX2(ms_size/8, 16)),
-    _worker_i(worker_i) { }
+    _worker_id(worker_id) { }
 
   ~CSetMarkOopClosure() {
     FREE_C_HEAP_ARRAY(oop, _ms);
@@ -3022,14 +3774,14 @@
   CMBitMap*          _bitMap;
   ConcurrentMark*    _cm;
   CSetMarkOopClosure _oop_cl;
-  int                _worker_i;
+  uint               _worker_id;
 
 public:
-  CSetMarkBitMapClosure(ConcurrentMark* cm, int ms_size, int worker_i) :
+  CSetMarkBitMapClosure(ConcurrentMark* cm, int ms_size, int worker_id) :
     _g1h(G1CollectedHeap::heap()),
     _bitMap(cm->nextMarkBitMap()),
-    _oop_cl(cm, ms_size, worker_i),
-    _worker_i(worker_i) { }
+    _oop_cl(cm, ms_size, worker_id),
+    _worker_id(worker_id) { }
 
   bool do_bit(size_t offset) {
     // convert offset into a HeapWord*
@@ -3054,17 +3806,17 @@
 class CompleteMarkingInCSetHRClosure: public HeapRegionClosure {
   CMBitMap*             _bm;
   CSetMarkBitMapClosure _bit_cl;
-  int                   _worker_i;
+  uint                  _worker_id;
 
   enum SomePrivateConstants {
     MSSize = 1000
   };
 
 public:
-  CompleteMarkingInCSetHRClosure(ConcurrentMark* cm, int worker_i) :
+  CompleteMarkingInCSetHRClosure(ConcurrentMark* cm, int worker_id) :
     _bm(cm->nextMarkBitMap()),
-    _bit_cl(cm, MSSize, worker_i),
-    _worker_i(worker_i) { }
+    _bit_cl(cm, MSSize, worker_id),
+    _worker_id(worker_id) { }
 
   bool doHeapRegion(HeapRegion* hr) {
     if (hr->claimHeapRegion(HeapRegion::CompleteMarkCSetClaimValue)) {
@@ -3083,19 +3835,6 @@
   }
 };
 
-class SetClaimValuesInCSetHRClosure: public HeapRegionClosure {
-  jint _claim_value;
-
-public:
-  SetClaimValuesInCSetHRClosure(jint claim_value) :
-    _claim_value(claim_value) { }
-
-  bool doHeapRegion(HeapRegion* hr) {
-    hr->set_claim_value(_claim_value);
-    return false;
-  }
-};
-
 class G1ParCompleteMarkInCSetTask: public AbstractGangTask {
 protected:
   G1CollectedHeap* _g1h;
@@ -3107,14 +3846,17 @@
     AbstractGangTask("Complete Mark in CSet"),
     _g1h(g1h), _cm(cm) { }
 
-  void work(int worker_i) {
-    CompleteMarkingInCSetHRClosure cmplt(_cm, worker_i);
-    HeapRegion* hr = _g1h->start_cset_region_for_worker(worker_i);
+  void work(uint worker_id) {
+    CompleteMarkingInCSetHRClosure cmplt(_cm, worker_id);
+    HeapRegion* hr = _g1h->start_cset_region_for_worker(worker_id);
     _g1h->collection_set_iterate_from(hr, &cmplt);
   }
 };
 
 void ConcurrentMark::complete_marking_in_collection_set() {
+  guarantee(false, "complete_marking_in_collection_set(): "
+                   "don't call this any more");
+
   G1CollectedHeap* g1h =  G1CollectedHeap::heap();
 
   if (!g1h->mark_in_progress()) {
@@ -3123,13 +3865,12 @@
   }
 
   double start = os::elapsedTime();
-  int n_workers = g1h->workers()->total_workers();
-
   G1ParCompleteMarkInCSetTask complete_mark_task(g1h, this);
 
   assert(g1h->check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity");
 
   if (G1CollectedHeap::use_parallel_gc_threads()) {
+    int n_workers = g1h->workers()->active_workers();
     g1h->set_par_threads(n_workers);
     g1h->workers()->run_task(&complete_mark_task);
     g1h->set_par_threads(0);
@@ -3139,9 +3880,8 @@
 
   assert(g1h->check_cset_heap_region_claim_values(HeapRegion::CompleteMarkCSetClaimValue), "sanity");
 
-  // Now reset the claim values in the regions in the collection set.
-  SetClaimValuesInCSetHRClosure set_cv_cl(HeapRegion::InitialClaimValue);
-  g1h->collection_set_iterate(&set_cv_cl);
+  // Reset the claim values in the regions in the collection set.
+  g1h->reset_cset_heap_region_claim_values();
 
   assert(g1h->check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity");
 
@@ -3164,6 +3904,8 @@
 // newCSet().
 
 void ConcurrentMark::newCSet() {
+  guarantee(false, "newCSet(): don't call this any more");
+
   if (!concurrent_marking_in_progress()) {
     // nothing to do if marking is not in progress
     return;
@@ -3202,6 +3944,8 @@
 }
 
 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
+  guarantee(false, "registerCSetRegion(): don't call this any more");
+
   if (!concurrent_marking_in_progress()) return;
 
   HeapWord* region_end = hr->end();
@@ -3213,6 +3957,9 @@
 // Resets the region fields of active CMTasks whose values point
 // into the collection set.
 void ConcurrentMark::reset_active_task_region_fields_in_cset() {
+  guarantee(false, "reset_active_task_region_fields_in_cset(): "
+                   "don't call this any more");
+
   assert(SafepointSynchronize::is_at_safepoint(), "should be in STW");
   assert(parallel_marking_threads() <= _max_task_num, "sanity");
 
@@ -3236,6 +3983,8 @@
 void ConcurrentMark::abort() {
   // Clear all marks to force marking thread to do nothing
   _nextMarkBitMap->clearAll();
+  // Clear the liveness counting data
+  clear_all_count_data();
   // Empty mark stack
   clear_marking_state();
   for (int i = 0; i < (int)_max_task_num; ++i) {
@@ -3288,31 +4037,23 @@
                          (_init_times.sum() + _remark_times.sum() +
                           _cleanup_times.sum())/1000.0);
   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
-                "(%8.2f s marking, %8.2f s counting).",
+                "(%8.2f s marking).",
                 cmThread()->vtime_accum(),
-                cmThread()->vtime_mark_accum(),
-                cmThread()->vtime_count_accum());
+                cmThread()->vtime_mark_accum());
 }
 
 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
   _parallel_workers->print_worker_threads_on(st);
 }
 
-// Closures
-// XXX: there seems to be a lot of code  duplication here;
-// should refactor and consolidate the shared code.
-
-// This closure is used to mark refs into the CMS generation in
-// the CMS bit map. Called at the first checkpoint.
-
 // We take a break if someone is trying to stop the world.
-bool ConcurrentMark::do_yield_check(int worker_i) {
+bool ConcurrentMark::do_yield_check(uint worker_id) {
   if (should_yield()) {
-    if (worker_i == 0) {
+    if (worker_id == 0) {
       _g1h->g1_policy()->record_concurrent_pause();
     }
     cmThread()->yield();
-    if (worker_i == 0) {
+    if (worker_id == 0) {
       _g1h->g1_policy()->record_concurrent_pause_end();
     }
     return true;
@@ -3923,6 +4664,10 @@
 }
 
 void CMTask::drain_region_stack(BitMapClosure* bc) {
+  assert(_cm->region_stack_empty(), "region stack should be empty");
+  assert(_aborted_region.is_empty(), "aborted region should be empty");
+  return;
+
   if (has_aborted()) return;
 
   assert(_region_finger == NULL,
@@ -4586,6 +5331,8 @@
 
 CMTask::CMTask(int task_id,
                ConcurrentMark* cm,
+               size_t* marked_bytes,
+               BitMap* card_bm,
                CMTaskQueue* task_queue,
                CMTaskQueueSet* task_queues)
   : _g1h(G1CollectedHeap::heap()),
@@ -4595,7 +5342,9 @@
     _task_queue(task_queue),
     _task_queues(task_queues),
     _cm_oop_closure(NULL),
-    _aborted_region(MemRegion()) {
+    _aborted_region(MemRegion()),
+    _marked_bytes_array(marked_bytes),
+    _card_bm(card_bm) {
   guarantee(task_queue != NULL, "invariant");
   guarantee(task_queues != NULL, "invariant");
 
--- a/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -84,8 +84,8 @@
   }
 
   // iteration
-  bool iterate(BitMapClosure* cl) { return _bm.iterate(cl); }
-  bool iterate(BitMapClosure* cl, MemRegion mr);
+  inline bool iterate(BitMapClosure* cl, MemRegion mr);
+  inline bool iterate(BitMapClosure* cl);
 
   // Return the address corresponding to the next marked bit at or after
   // "addr", and before "limit", if "limit" is non-NULL.  If there is no
@@ -166,10 +166,10 @@
 // Ideally this should be GrowableArray<> just like MSC's marking stack(s).
 class CMMarkStack VALUE_OBJ_CLASS_SPEC {
   ConcurrentMark* _cm;
-  oop*   _base;      // bottom of stack
-  jint   _index;     // one more than last occupied index
-  jint   _capacity;  // max #elements
-  jint   _oops_do_bound;  // Number of elements to include in next iteration.
+  oop*   _base;        // bottom of stack
+  jint   _index;       // one more than last occupied index
+  jint   _capacity;    // max #elements
+  jint   _saved_index; // value of _index saved at start of GC
   NOT_PRODUCT(jint _max_depth;)  // max depth plumbed during run
 
   bool   _overflow;
@@ -247,16 +247,12 @@
 
   void setEmpty()   { _index = 0; clear_overflow(); }
 
-  // Record the current size; a subsequent "oops_do" will iterate only over
-  // indices valid at the time of this call.
-  void set_oops_do_bound(jint bound = -1) {
-    if (bound == -1) {
-      _oops_do_bound = _index;
-    } else {
-      _oops_do_bound = bound;
-    }
-  }
-  jint oops_do_bound() { return _oops_do_bound; }
+  // Record the current index.
+  void note_start_of_gc();
+
+  // Make sure that we have not added any entries to the stack during GC.
+  void note_end_of_gc();
+
   // iterate over the oops in the mark stack, up to the bound recorded via
   // the call above.
   void oops_do(OopClosure* f);
@@ -353,10 +349,62 @@
   high_verbose       // per object verbose
 } CMVerboseLevel;
 
+class YoungList;
+
+// Root Regions are regions that are not empty at the beginning of a
+// marking cycle and which we might collect during an evacuation pause
+// while the cycle is active. Given that, during evacuation pauses, we
+// do not copy objects that are explicitly marked, what we have to do
+// for the root regions is to scan them and mark all objects reachable
+// from them. According to the SATB assumptions, we only need to visit
+// each object once during marking. So, as long as we finish this scan
+// before the next evacuation pause, we can copy the objects from the
+// root regions without having to mark them or do anything else to them.
+//
+// Currently, we only support root region scanning once (at the start
+// of the marking cycle) and the root regions are all the survivor
+// regions populated during the initial-mark pause.
+class CMRootRegions VALUE_OBJ_CLASS_SPEC {
+private:
+  YoungList*           _young_list;
+  ConcurrentMark*      _cm;
+
+  volatile bool        _scan_in_progress;
+  volatile bool        _should_abort;
+  HeapRegion* volatile _next_survivor;
+
+public:
+  CMRootRegions();
+  // We actually do most of the initialization in this method.
+  void init(G1CollectedHeap* g1h, ConcurrentMark* cm);
+
+  // Reset the claiming / scanning of the root regions.
+  void prepare_for_scan();
+
+  // Forces get_next() to return NULL so that the iteration aborts early.
+  void abort() { _should_abort = true; }
+
+  // Return true if the CM thread are actively scanning root regions,
+  // false otherwise.
+  bool scan_in_progress() { return _scan_in_progress; }
+
+  // Claim the next root region to scan atomically, or return NULL if
+  // all have been claimed.
+  HeapRegion* claim_next();
+
+  // Flag that we're done with root region scanning and notify anyone
+  // who's waiting on it. If aborted is false, assume that all regions
+  // have been claimed.
+  void scan_finished();
+
+  // If CM threads are still scanning root regions, wait until they
+  // are done. Return true if we had to wait, false otherwise.
+  bool wait_until_scan_finished();
+};
 
 class ConcurrentMarkThread;
 
-class ConcurrentMark: public CHeapObj {
+class ConcurrentMark : public CHeapObj {
   friend class ConcurrentMarkThread;
   friend class CMTask;
   friend class CMBitMapClosure;
@@ -374,9 +422,9 @@
 protected:
   ConcurrentMarkThread* _cmThread;   // the thread doing the work
   G1CollectedHeap*      _g1h;        // the heap.
-  size_t                _parallel_marking_threads; // the number of marking
+  uint                  _parallel_marking_threads; // the number of marking
                                                    // threads we're use
-  size_t                _max_parallel_marking_threads; // max number of marking
+  uint                  _max_parallel_marking_threads; // max number of marking
                                                    // threads we'll ever use
   double                _sleep_factor; // how much we have to sleep, with
                                        // respect to the work we just did, to
@@ -390,7 +438,7 @@
 
   FreeRegionList        _cleanup_list;
 
-  // CMS marking support structures
+  // Concurrent marking support structures
   CMBitMap                _markBitMap1;
   CMBitMap                _markBitMap2;
   CMBitMapRO*             _prevMarkBitMap; // completed mark bitmap
@@ -404,6 +452,9 @@
   HeapWord*               _heap_start;
   HeapWord*               _heap_end;
 
+  // Root region tracking and claiming.
+  CMRootRegions           _root_regions;
+
   // For gray objects
   CMMarkStack             _markStack; // Grey objects behind global finger.
   CMRegionStack           _regionStack; // Grey regions behind global finger.
@@ -412,8 +463,8 @@
                                     // last claimed region
 
   // marking tasks
-  size_t                  _max_task_num; // maximum task number
-  size_t                  _active_tasks; // task num currently active
+  uint                    _max_task_num; // maximum task number
+  uint                    _active_tasks; // task num currently active
   CMTask**                _tasks;        // task queue array (max_task_num len)
   CMTaskQueueSet*         _task_queues;  // task queue set
   ParallelTaskTerminator  _terminator;   // for termination
@@ -430,7 +481,6 @@
   WorkGangBarrierSync     _first_overflow_barrier_sync;
   WorkGangBarrierSync     _second_overflow_barrier_sync;
 
-
   // this is set by any task, when an overflow on the global data
   // structures is detected.
   volatile bool           _has_overflown;
@@ -492,7 +542,7 @@
 
   // It should be called to indicate which phase we're in (concurrent
   // mark or remark) and how many threads are currently active.
-  void set_phase(size_t active_tasks, bool concurrent);
+  void set_phase(uint active_tasks, bool concurrent);
   // We do this after we're done with marking so that the marking data
   // structures are initialised to a sensible and predictable state.
   void set_non_marking_state();
@@ -505,8 +555,8 @@
   }
 
   // accessor methods
-  size_t parallel_marking_threads() { return _parallel_marking_threads; }
-  size_t max_parallel_marking_threads() { return _max_parallel_marking_threads;}
+  uint parallel_marking_threads() { return _parallel_marking_threads; }
+  uint max_parallel_marking_threads() { return _max_parallel_marking_threads;}
   double sleep_factor()             { return _sleep_factor; }
   double marking_task_overhead()    { return _marking_task_overhead;}
   double cleanup_sleep_factor()     { return _cleanup_sleep_factor; }
@@ -514,7 +564,7 @@
 
   HeapWord*               finger()        { return _finger;   }
   bool                    concurrent()    { return _concurrent; }
-  size_t                  active_tasks()  { return _active_tasks; }
+  uint                    active_tasks()  { return _active_tasks; }
   ParallelTaskTerminator* terminator()    { return &_terminator; }
 
   // It claims the next available region to be scanned by a marking
@@ -558,9 +608,9 @@
   bool has_overflown()           { return _has_overflown; }
   void set_has_overflown()       { _has_overflown = true; }
   void clear_has_overflown()     { _has_overflown = false; }
+  bool restart_for_overflow()    { return _restart_for_overflow; }
 
   bool has_aborted()             { return _has_aborted; }
-  bool restart_for_overflow()    { return _restart_for_overflow; }
 
   // Methods to enter the two overflow sync barriers
   void enter_first_sync_barrier(int task_num);
@@ -582,6 +632,27 @@
     }
   }
 
+  // Live Data Counting data structures...
+  // These data structures are initialized at the start of
+  // marking. They are written to while marking is active.
+  // They are aggregated during remark; the aggregated values
+  // are then used to populate the _region_bm, _card_bm, and
+  // the total live bytes, which are then subsequently updated
+  // during cleanup.
+
+  // An array of bitmaps (one bit map per task). Each bitmap
+  // is used to record the cards spanned by the live objects
+  // marked by that task/worker.
+  BitMap*  _count_card_bitmaps;
+
+  // Used to record the number of marked live bytes
+  // (for each region, by worker thread).
+  size_t** _count_marked_bytes;
+
+  // Card index of the bottom of the G1 heap. Used for biasing indices into
+  // the card bitmaps.
+  intptr_t _heap_bottom_card_num;
+
 public:
   // Manipulation of the global mark stack.
   // Notice that the first mark_stack_push is CAS-based, whereas the
@@ -675,6 +746,8 @@
   // Returns true if there are any aborted memory regions.
   bool has_aborted_regions();
 
+  CMRootRegions* root_regions() { return &_root_regions; }
+
   bool concurrent_marking_in_progress() {
     return _concurrent_marking_in_progress;
   }
@@ -707,6 +780,7 @@
 
   ConcurrentMark(ReservedSpace rs, int max_regions);
   ~ConcurrentMark();
+
   ConcurrentMarkThread* cmThread() { return _cmThread; }
 
   CMBitMapRO* prevMarkBitMap() const { return _prevMarkBitMap; }
@@ -715,19 +789,27 @@
   // Returns the number of GC threads to be used in a concurrent
   // phase based on the number of GC threads being used in a STW
   // phase.
-  size_t scale_parallel_threads(size_t n_par_threads);
+  uint scale_parallel_threads(uint n_par_threads);
 
   // Calculates the number of GC threads to be used in a concurrent phase.
-  int calc_parallel_marking_threads();
+  uint calc_parallel_marking_threads();
 
   // The following three are interaction between CM and
   // G1CollectedHeap
 
   // This notifies CM that a root during initial-mark needs to be
-  // grayed and it's MT-safe. Currently, we just mark it. But, in the
-  // future, we can experiment with pushing it on the stack and we can
-  // do this without changing G1CollectedHeap.
-  void grayRoot(oop p);
+  // grayed. It is MT-safe. word_size is the size of the object in
+  // words. It is passed explicitly as sometimes we cannot calculate
+  // it from the given object because it might be in an inconsistent
+  // state (e.g., in to-space and being copied). So the caller is
+  // responsible for dealing with this issue (e.g., get the size from
+  // the from-space image when the to-space image might be
+  // inconsistent) and always passing the size. hr is the region that
+  // contains the object and it's passed optionally from callers who
+  // might already have it (no point in recalculating it).
+  inline void grayRoot(oop obj, size_t word_size,
+                       uint worker_id, HeapRegion* hr = NULL);
+
   // It's used during evacuation pauses to gray a region, if
   // necessary, and it's MT-safe. It assumes that the caller has
   // marked any objects on that region. If _should_gray_objects is
@@ -735,6 +817,7 @@
   // pushed on the region stack, if it is located below the global
   // finger, otherwise we do nothing.
   void grayRegionIfNecessary(MemRegion mr);
+
   // It's used during evacuation pauses to mark and, if necessary,
   // gray a single object and it's MT-safe. It assumes the caller did
   // not mark the object. If _should_gray_objects is true and we're
@@ -776,6 +859,13 @@
   void checkpointRootsInitialPre();
   void checkpointRootsInitialPost();
 
+  // Scan all the root regions and mark everything reachable from
+  // them.
+  void scanRootRegions();
+
+  // Scan a single root region and mark everything reachable from it.
+  void scanRootRegion(HeapRegion* hr, uint worker_id);
+
   // Do concurrent phase of marking, to a tentative transitive closure.
   void markFromRoots();
 
@@ -785,30 +875,44 @@
 
   void checkpointRootsFinal(bool clear_all_soft_refs);
   void checkpointRootsFinalWork();
-  void calcDesiredRegions();
   void cleanup();
   void completeCleanup();
 
   // Mark in the previous bitmap.  NB: this is usually read-only, so use
   // this carefully!
-  void markPrev(oop p);
-  void clear(oop p);
-  // Clears marks for all objects in the given range, for both prev and
-  // next bitmaps.  NB: the previous bitmap is usually read-only, so use
-  // this carefully!
-  void clearRangeBothMaps(MemRegion mr);
+  inline void markPrev(oop p);
+
+  // Clears marks for all objects in the given range, for the prev,
+  // next, or both bitmaps.  NB: the previous bitmap is usually
+  // read-only, so use this carefully!
+  void clearRangePrevBitmap(MemRegion mr);
+  void clearRangeNextBitmap(MemRegion mr);
+  void clearRangeBothBitmaps(MemRegion mr);
 
-  // Record the current top of the mark and region stacks; a
-  // subsequent oops_do() on the mark stack and
-  // invalidate_entries_into_cset() on the region stack will iterate
-  // only over indices valid at the time of this call.
-  void set_oops_do_bound() {
-    _markStack.set_oops_do_bound();
-    _regionStack.set_oops_do_bound();
+  // Notify data structures that a GC has started.
+  void note_start_of_gc() {
+    _markStack.note_start_of_gc();
   }
+
+  // Notify data structures that a GC is finished.
+  void note_end_of_gc() {
+    _markStack.note_end_of_gc();
+  }
+
   // Iterate over the oops in the mark stack and all local queues. It
   // also calls invalidate_entries_into_cset() on the region stack.
   void oops_do(OopClosure* f);
+
+  // Verify that there are no CSet oops on the stacks (taskqueues /
+  // global mark stack), enqueued SATB buffers, per-thread SATB
+  // buffers, and fingers (global / per-task). The boolean parameters
+  // decide which of the above data structures to verify. If marking
+  // is not in progress, it's a no-op.
+  void verify_no_cset_oops(bool verify_stacks,
+                           bool verify_enqueued_buffers,
+                           bool verify_thread_buffers,
+                           bool verify_fingers) PRODUCT_RETURN;
+
   // It is called at the end of an evacuation pause during marking so
   // that CM is notified of where the new end of the heap is. It
   // doesn't do anything if concurrent_marking_in_progress() is false,
@@ -873,7 +977,7 @@
     return _prevMarkBitMap->isMarked(addr);
   }
 
-  inline bool do_yield_check(int worker_i = 0);
+  inline bool do_yield_check(uint worker_i = 0);
   inline bool should_yield();
 
   // Called to abort the marking cycle after a Full GC takes palce.
@@ -901,6 +1005,114 @@
   bool verbose_high() {
     return _MARKING_VERBOSE_ && _verbose_level >= high_verbose;
   }
+
+  // Counting data structure accessors
+
+  // Returns the card number of the bottom of the G1 heap.
+  // Used in biasing indices into accounting card bitmaps.
+  intptr_t heap_bottom_card_num() const {
+    return _heap_bottom_card_num;
+  }
+
+  // Returns the card bitmap for a given task or worker id.
+  BitMap* count_card_bitmap_for(uint worker_id) {
+    assert(0 <= worker_id && worker_id < _max_task_num, "oob");
+    assert(_count_card_bitmaps != NULL, "uninitialized");
+    BitMap* task_card_bm = &_count_card_bitmaps[worker_id];
+    assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
+    return task_card_bm;
+  }
+
+  // Returns the array containing the marked bytes for each region,
+  // for the given worker or task id.
+  size_t* count_marked_bytes_array_for(uint worker_id) {
+    assert(0 <= worker_id && worker_id < _max_task_num, "oob");
+    assert(_count_marked_bytes != NULL, "uninitialized");
+    size_t* marked_bytes_array = _count_marked_bytes[worker_id];
+    assert(marked_bytes_array != NULL, "uninitialized");
+    return marked_bytes_array;
+  }
+
+  // Returns the index in the liveness accounting card table bitmap
+  // for the given address
+  inline BitMap::idx_t card_bitmap_index_for(HeapWord* addr);
+
+  // Counts the size of the given memory region in the the given
+  // marked_bytes array slot for the given HeapRegion.
+  // Sets the bits in the given card bitmap that are associated with the
+  // cards that are spanned by the memory region.
+  inline void count_region(MemRegion mr, HeapRegion* hr,
+                           size_t* marked_bytes_array,
+                           BitMap* task_card_bm);
+
+  // Counts the given memory region in the task/worker counting
+  // data structures for the given worker id.
+  inline void count_region(MemRegion mr, HeapRegion* hr, uint worker_id);
+
+  // Counts the given memory region in the task/worker counting
+  // data structures for the given worker id.
+  inline void count_region(MemRegion mr, uint worker_id);
+
+  // Counts the given object in the given task/worker counting
+  // data structures.
+  inline void count_object(oop obj, HeapRegion* hr,
+                           size_t* marked_bytes_array,
+                           BitMap* task_card_bm);
+
+  // Counts the given object in the task/worker counting data
+  // structures for the given worker id.
+  inline void count_object(oop obj, HeapRegion* hr, uint worker_id);
+
+  // Attempts to mark the given object and, if successful, counts
+  // the object in the given task/worker counting structures.
+  inline bool par_mark_and_count(oop obj, HeapRegion* hr,
+                                 size_t* marked_bytes_array,
+                                 BitMap* task_card_bm);
+
+  // Attempts to mark the given object and, if successful, counts
+  // the object in the task/worker counting structures for the
+  // given worker id.
+  inline bool par_mark_and_count(oop obj, size_t word_size,
+                                 HeapRegion* hr, uint worker_id);
+
+  // Attempts to mark the given object and, if successful, counts
+  // the object in the task/worker counting structures for the
+  // given worker id.
+  inline bool par_mark_and_count(oop obj, HeapRegion* hr, uint worker_id);
+
+  // Similar to the above routine but we don't know the heap region that
+  // contains the object to be marked/counted, which this routine looks up.
+  inline bool par_mark_and_count(oop obj, uint worker_id);
+
+  // Similar to the above routine but there are times when we cannot
+  // safely calculate the size of obj due to races and we, therefore,
+  // pass the size in as a parameter. It is the caller's reponsibility
+  // to ensure that the size passed in for obj is valid.
+  inline bool par_mark_and_count(oop obj, size_t word_size, uint worker_id);
+
+  // Unconditionally mark the given object, and unconditinally count
+  // the object in the counting structures for worker id 0.
+  // Should *not* be called from parallel code.
+  inline bool mark_and_count(oop obj, HeapRegion* hr);
+
+  // Similar to the above routine but we don't know the heap region that
+  // contains the object to be marked/counted, which this routine looks up.
+  // Should *not* be called from parallel code.
+  inline bool mark_and_count(oop obj);
+
+protected:
+  // Clear all the per-task bitmaps and arrays used to store the
+  // counting data.
+  void clear_all_count_data();
+
+  // Aggregates the counting data for each worker/task
+  // that was constructed while marking. Also sets
+  // the amount of marked bytes for each region and
+  // the top at concurrent mark count.
+  void aggregate_count_data();
+
+  // Verification routine
+  void verify_count_data();
 };
 
 // A class representing a marking task.
@@ -1019,6 +1231,12 @@
 
   TruncatedSeq                _marking_step_diffs_ms;
 
+  // Counting data structures. Embedding the task's marked_bytes_array
+  // and card bitmap into the actual task saves having to go through
+  // the ConcurrentMark object.
+  size_t*                     _marked_bytes_array;
+  BitMap*                     _card_bm;
+
   // LOTS of statistics related with this task
 #if _MARKING_STATS_
   NumberSeq                   _all_clock_intervals_ms;
@@ -1166,6 +1384,7 @@
   // It keeps picking SATB buffers and processing them until no SATB
   // buffers are available.
   void drain_satb_buffers();
+
   // It keeps popping regions from the region stack and processing
   // them until the region stack is empty.
   void drain_region_stack(BitMapClosure* closure);
@@ -1183,6 +1402,7 @@
   }
 
   CMTask(int task_num, ConcurrentMark *cm,
+         size_t* marked_bytes, BitMap* card_bm,
          CMTaskQueue* task_queue, CMTaskQueueSet* task_queues);
 
   // it prints statistics associated with this task
--- a/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -28,6 +28,214 @@
 #include "gc_implementation/g1/concurrentMark.hpp"
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
 
+// Returns the index in the liveness accounting card bitmap
+// for the given address
+inline BitMap::idx_t ConcurrentMark::card_bitmap_index_for(HeapWord* addr) {
+  // Below, the term "card num" means the result of shifting an address
+  // by the card shift -- address 0 corresponds to card number 0.  One
+  // must subtract the card num of the bottom of the heap to obtain a
+  // card table index.
+
+  intptr_t card_num = intptr_t(uintptr_t(addr) >> CardTableModRefBS::card_shift);
+  return card_num - heap_bottom_card_num();
+}
+
+// Counts the given memory region in the given task/worker
+// counting data structures.
+inline void ConcurrentMark::count_region(MemRegion mr, HeapRegion* hr,
+                                         size_t* marked_bytes_array,
+                                         BitMap* task_card_bm) {
+  G1CollectedHeap* g1h = _g1h;
+  HeapWord* start = mr.start();
+  HeapWord* last = mr.last();
+  size_t region_size_bytes = mr.byte_size();
+  size_t index = hr->hrs_index();
+
+  assert(!hr->continuesHumongous(), "should not be HC region");
+  assert(hr == g1h->heap_region_containing(start), "sanity");
+  assert(hr == g1h->heap_region_containing(mr.last()), "sanity");
+  assert(marked_bytes_array != NULL, "pre-condition");
+  assert(task_card_bm != NULL, "pre-condition");
+
+  // Add to the task local marked bytes for this region.
+  marked_bytes_array[index] += region_size_bytes;
+
+  BitMap::idx_t start_idx = card_bitmap_index_for(start);
+  BitMap::idx_t last_idx = card_bitmap_index_for(last);
+
+  // The card bitmap is task/worker specific => no need to use 'par' routines.
+  // Set bits in the inclusive bit range [start_idx, last_idx].
+  //
+  // For small ranges use a simple loop; otherwise use set_range
+  // The range are the cards that are spanned by the object/region
+  // so 8 cards will allow objects/regions up to 4K to be handled
+  // using the loop.
+  if ((last_idx - start_idx) <= 8) {
+    for (BitMap::idx_t i = start_idx; i <= last_idx; i += 1) {
+     task_card_bm->set_bit(i);
+    }
+  } else {
+    assert(last_idx < task_card_bm->size(), "sanity");
+    // Note: BitMap::set_range() is exclusive.
+    task_card_bm->set_range(start_idx, last_idx+1);
+  }
+}
+
+// Counts the given memory region in the task/worker counting
+// data structures for the given worker id.
+inline void ConcurrentMark::count_region(MemRegion mr,
+                                         HeapRegion* hr,
+                                         uint worker_id) {
+  size_t* marked_bytes_array = count_marked_bytes_array_for(worker_id);
+  BitMap* task_card_bm = count_card_bitmap_for(worker_id);
+  count_region(mr, hr, marked_bytes_array, task_card_bm);
+}
+
+// Counts the given memory region, which may be a single object, in the
+// task/worker counting data structures for the given worker id.
+inline void ConcurrentMark::count_region(MemRegion mr, uint worker_id) {
+  HeapWord* addr = mr.start();
+  HeapRegion* hr = _g1h->heap_region_containing_raw(addr);
+  count_region(mr, hr, worker_id);
+}
+
+// Counts the given object in the given task/worker counting data structures.
+inline void ConcurrentMark::count_object(oop obj,
+                                         HeapRegion* hr,
+                                         size_t* marked_bytes_array,
+                                         BitMap* task_card_bm) {
+  MemRegion mr((HeapWord*)obj, obj->size());
+  count_region(mr, hr, marked_bytes_array, task_card_bm);
+}
+
+// Counts the given object in the task/worker counting data
+// structures for the given worker id.
+inline void ConcurrentMark::count_object(oop obj,
+                                         HeapRegion* hr,
+                                         uint worker_id) {
+  size_t* marked_bytes_array = count_marked_bytes_array_for(worker_id);
+  BitMap* task_card_bm = count_card_bitmap_for(worker_id);
+  HeapWord* addr = (HeapWord*) obj;
+  count_object(obj, hr, marked_bytes_array, task_card_bm);
+}
+
+// Attempts to mark the given object and, if successful, counts
+// the object in the given task/worker counting structures.
+inline bool ConcurrentMark::par_mark_and_count(oop obj,
+                                               HeapRegion* hr,
+                                               size_t* marked_bytes_array,
+                                               BitMap* task_card_bm) {
+  HeapWord* addr = (HeapWord*)obj;
+  if (_nextMarkBitMap->parMark(addr)) {
+    // Update the task specific count data for the object.
+    count_object(obj, hr, marked_bytes_array, task_card_bm);
+    return true;
+  }
+  return false;
+}
+
+// Attempts to mark the given object and, if successful, counts
+// the object in the task/worker counting structures for the
+// given worker id.
+inline bool ConcurrentMark::par_mark_and_count(oop obj,
+                                               size_t word_size,
+                                               HeapRegion* hr,
+                                               uint worker_id) {
+  HeapWord* addr = (HeapWord*)obj;
+  if (_nextMarkBitMap->parMark(addr)) {
+    MemRegion mr(addr, word_size);
+    count_region(mr, hr, worker_id);
+    return true;
+  }
+  return false;
+}
+
+// Attempts to mark the given object and, if successful, counts
+// the object in the task/worker counting structures for the
+// given worker id.
+inline bool ConcurrentMark::par_mark_and_count(oop obj,
+                                               HeapRegion* hr,
+                                               uint worker_id) {
+  HeapWord* addr = (HeapWord*)obj;
+  if (_nextMarkBitMap->parMark(addr)) {
+    // Update the task specific count data for the object.
+    count_object(obj, hr, worker_id);
+    return true;
+  }
+  return false;
+}
+
+// As above - but we don't know the heap region containing the
+// object and so have to supply it.
+inline bool ConcurrentMark::par_mark_and_count(oop obj, uint worker_id) {
+  HeapWord* addr = (HeapWord*)obj;
+  HeapRegion* hr = _g1h->heap_region_containing_raw(addr);
+  return par_mark_and_count(obj, hr, worker_id);
+}
+
+// Similar to the above routine but we already know the size, in words, of
+// the object that we wish to mark/count
+inline bool ConcurrentMark::par_mark_and_count(oop obj,
+                                               size_t word_size,
+                                               uint worker_id) {
+  HeapWord* addr = (HeapWord*)obj;
+  if (_nextMarkBitMap->parMark(addr)) {
+    // Update the task specific count data for the object.
+    MemRegion mr(addr, word_size);
+    count_region(mr, worker_id);
+    return true;
+  }
+  return false;
+}
+
+// Unconditionally mark the given object, and unconditinally count
+// the object in the counting structures for worker id 0.
+// Should *not* be called from parallel code.
+inline bool ConcurrentMark::mark_and_count(oop obj, HeapRegion* hr) {
+  HeapWord* addr = (HeapWord*)obj;
+  _nextMarkBitMap->mark(addr);
+  // Update the task specific count data for the object.
+  count_object(obj, hr, 0 /* worker_id */);
+  return true;
+}
+
+// As above - but we don't have the heap region containing the
+// object, so we have to supply it.
+inline bool ConcurrentMark::mark_and_count(oop obj) {
+  HeapWord* addr = (HeapWord*)obj;
+  HeapRegion* hr = _g1h->heap_region_containing_raw(addr);
+  return mark_and_count(obj, hr);
+}
+
+inline bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
+  HeapWord* start_addr = MAX2(startWord(), mr.start());
+  HeapWord* end_addr = MIN2(endWord(), mr.end());
+
+  if (end_addr > start_addr) {
+    // Right-open interval [start-offset, end-offset).
+    BitMap::idx_t start_offset = heapWordToOffset(start_addr);
+    BitMap::idx_t end_offset = heapWordToOffset(end_addr);
+
+    start_offset = _bm.get_next_one_offset(start_offset, end_offset);
+    while (start_offset < end_offset) {
+      HeapWord* obj_addr = offsetToHeapWord(start_offset);
+      oop obj = (oop) obj_addr;
+      if (!cl->do_bit(start_offset)) {
+        return false;
+      }
+      HeapWord* next_addr = MIN2(obj_addr + obj->size(), end_addr);
+      BitMap::idx_t next_offset = heapWordToOffset(next_addr);
+      start_offset = _bm.get_next_one_offset(next_offset, end_offset);
+    }
+  }
+  return true;
+}
+
+inline bool CMBitMapRO::iterate(BitMapClosure* cl) {
+  MemRegion mr(startWord(), sizeInWords());
+  return iterate(cl, mr);
+}
+
 inline void CMTask::push(oop obj) {
   HeapWord* objAddr = (HeapWord*) obj;
   assert(_g1h->is_in_g1_reserved(objAddr), "invariant");
@@ -84,7 +292,7 @@
 
   HeapWord* objAddr = (HeapWord*) obj;
   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
- if (_g1h->is_in_g1_reserved(objAddr)) {
+  if (_g1h->is_in_g1_reserved(objAddr)) {
     assert(obj != NULL, "null check is implicit");
     if (!_nextMarkBitMap->isMarked(objAddr)) {
       // Only get the containing region if the object is not marked on the
@@ -98,9 +306,9 @@
         }
 
         // we need to mark it first
-        if (_nextMarkBitMap->parMark(objAddr)) {
+        if (_cm->par_mark_and_count(obj, hr, _marked_bytes_array, _card_bm)) {
           // No OrderAccess:store_load() is needed. It is implicit in the
-          // CAS done in parMark(objAddr) above
+          // CAS done in CMBitMap::parMark() call in the routine above.
           HeapWord* global_finger = _cm->finger();
 
 #if _CHECK_BOTH_FINGERS_
@@ -153,4 +361,42 @@
   }
 }
 
+inline void ConcurrentMark::markPrev(oop p) {
+  assert(!_prevMarkBitMap->isMarked((HeapWord*) p), "sanity");
+  // Note we are overriding the read-only view of the prev map here, via
+  // the cast.
+  ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*) p);
+}
+
+inline void ConcurrentMark::grayRoot(oop obj, size_t word_size,
+                                     uint worker_id, HeapRegion* hr) {
+  assert(obj != NULL, "pre-condition");
+  HeapWord* addr = (HeapWord*) obj;
+  if (hr == NULL) {
+    hr = _g1h->heap_region_containing_raw(addr);
+  } else {
+    assert(hr->is_in(addr), "pre-condition");
+  }
+  assert(hr != NULL, "sanity");
+  // Given that we're looking for a region that contains an object
+  // header it's impossible to get back a HC region.
+  assert(!hr->continuesHumongous(), "sanity");
+
+  // We cannot assert that word_size == obj->size() given that obj
+  // might not be in a consistent state (another thread might be in
+  // the process of copying it). So the best thing we can do is to
+  // assert that word_size is under an upper bound which is its
+  // containing region's capacity.
+  assert(word_size * HeapWordSize <= hr->capacity(),
+         err_msg("size: "SIZE_FORMAT" capacity: "SIZE_FORMAT" "HR_FORMAT,
+                 word_size * HeapWordSize, hr->capacity(),
+                 HR_FORMAT_PARAMS(hr)));
+
+  if (addr < hr->next_top_at_mark_start()) {
+    if (!_nextMarkBitMap->isMarked(addr)) {
+      par_mark_and_count(obj, word_size, hr, worker_id);
+    }
+  }
+}
+
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_INLINE_HPP
--- a/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -44,9 +44,7 @@
   _started(false),
   _in_progress(false),
   _vtime_accum(0.0),
-  _vtime_mark_accum(0.0),
-  _vtime_count_accum(0.0)
-{
+  _vtime_mark_accum(0.0) {
   create_and_start();
 }
 
@@ -94,9 +92,36 @@
       ResourceMark rm;
       HandleMark   hm;
       double cycle_start = os::elapsedVTime();
-      double mark_start_sec = os::elapsedTime();
       char verbose_str[128];
 
+      // We have to ensure that we finish scanning the root regions
+      // before the next GC takes place. To ensure this we have to
+      // make sure that we do not join the STS until the root regions
+      // have been scanned. If we did then it's possible that a
+      // subsequent GC could block us from joining the STS and proceed
+      // without the root regions have been scanned which would be a
+      // correctness issue.
+
+      double scan_start = os::elapsedTime();
+      if (!cm()->has_aborted()) {
+        if (PrintGC) {
+          gclog_or_tty->date_stamp(PrintGCDateStamps);
+          gclog_or_tty->stamp(PrintGCTimeStamps);
+          gclog_or_tty->print_cr("[GC concurrent-root-region-scan-start]");
+        }
+
+        _cm->scanRootRegions();
+
+        double scan_end = os::elapsedTime();
+        if (PrintGC) {
+          gclog_or_tty->date_stamp(PrintGCDateStamps);
+          gclog_or_tty->stamp(PrintGCTimeStamps);
+          gclog_or_tty->print_cr("[GC concurrent-root-region-scan-end, %1.7lf]",
+                                 scan_end - scan_start);
+        }
+      }
+
+      double mark_start_sec = os::elapsedTime();
       if (PrintGC) {
         gclog_or_tty->date_stamp(PrintGCDateStamps);
         gclog_or_tty->stamp(PrintGCTimeStamps);
@@ -148,36 +173,12 @@
         }
       } while (cm()->restart_for_overflow());
 
-      double counting_start_time = os::elapsedVTime();
-      if (!cm()->has_aborted()) {
-        double count_start_sec = os::elapsedTime();
-        if (PrintGC) {
-          gclog_or_tty->date_stamp(PrintGCDateStamps);
-          gclog_or_tty->stamp(PrintGCTimeStamps);
-          gclog_or_tty->print_cr("[GC concurrent-count-start]");
-        }
-
-        _sts.join();
-        _cm->calcDesiredRegions();
-        _sts.leave();
-
-        if (!cm()->has_aborted()) {
-          double count_end_sec = os::elapsedTime();
-          if (PrintGC) {
-            gclog_or_tty->date_stamp(PrintGCDateStamps);
-            gclog_or_tty->stamp(PrintGCTimeStamps);
-            gclog_or_tty->print_cr("[GC concurrent-count-end, %1.7lf]",
-                                   count_end_sec - count_start_sec);
-          }
-        }
-      }
-
       double end_time = os::elapsedVTime();
-      _vtime_count_accum += (end_time - counting_start_time);
       // Update the total virtual time before doing this, since it will try
       // to measure it to get the vtime for this marking.  We purposely
       // neglect the presumably-short "completeCleanup" phase here.
       _vtime_accum = (end_time - _vtime_start);
+
       if (!cm()->has_aborted()) {
         if (g1_policy->adaptive_young_list_length()) {
           double now = os::elapsedTime();
--- a/src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -40,7 +40,6 @@
   double _vtime_accum;  // Accumulated virtual time.
 
   double _vtime_mark_accum;
-  double _vtime_count_accum;
 
  public:
   virtual void run();
@@ -69,8 +68,6 @@
   double vtime_accum();
   // Marking virtual time so far
   double vtime_mark_accum();
-  // Counting virtual time so far.
-  double vtime_count_accum() { return _vtime_count_accum; }
 
   ConcurrentMark* cm()     { return _cm; }
 
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -32,9 +32,11 @@
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
+#include "gc_implementation/g1/g1EvacFailure.hpp"
 #include "gc_implementation/g1/g1MarkSweep.hpp"
 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
 #include "gc_implementation/g1/g1RemSet.inline.hpp"
+#include "gc_implementation/g1/heapRegion.inline.hpp"
 #include "gc_implementation/g1/heapRegionRemSet.hpp"
 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
 #include "gc_implementation/g1/vm_operations_g1.hpp"
@@ -172,13 +174,10 @@
   }
 };
 
-YoungList::YoungList(G1CollectedHeap* g1h)
-  : _g1h(g1h), _head(NULL),
-    _length(0),
-    _last_sampled_rs_lengths(0),
-    _survivor_head(NULL), _survivor_tail(NULL), _survivor_length(0)
-{
-  guarantee( check_list_empty(false), "just making sure..." );
+YoungList::YoungList(G1CollectedHeap* g1h) :
+    _g1h(g1h), _head(NULL), _length(0), _last_sampled_rs_lengths(0),
+    _survivor_head(NULL), _survivor_tail(NULL), _survivor_length(0) {
+  guarantee(check_list_empty(false), "just making sure...");
 }
 
 void YoungList::push_region(HeapRegion *hr) {
@@ -591,17 +590,29 @@
     }
     res = new_region_try_secondary_free_list();
   }
-  if (res == NULL && do_expand) {
+  if (res == NULL && do_expand && _expand_heap_after_alloc_failure) {
+    // Currently, only attempts to allocate GC alloc regions set
+    // do_expand to true. So, we should only reach here during a
+    // safepoint. If this assumption changes we might have to
+    // reconsider the use of _expand_heap_after_alloc_failure.
+    assert(SafepointSynchronize::is_at_safepoint(), "invariant");
+
     ergo_verbose1(ErgoHeapSizing,
                   "attempt heap expansion",
                   ergo_format_reason("region allocation request failed")
                   ergo_format_byte("allocation request"),
                   word_size * HeapWordSize);
     if (expand(word_size * HeapWordSize)) {
-      // Even though the heap was expanded, it might not have reached
-      // the desired size. So, we cannot assume that the allocation
-      // will succeed.
+      // Given that expand() succeeded in expanding the heap, and we
+      // always expand the heap by an amount aligned to the heap
+      // region size, the free list should in theory not be empty. So
+      // it would probably be OK to use remove_head(). But the extra
+      // check for NULL is unlikely to be a performance issue here (we
+      // just expanded the heap!) so let's just be conservative and
+      // use remove_head_or_null().
       res = _free_list.remove_head_or_null();
+    } else {
+      _expand_heap_after_alloc_failure = false;
     }
   }
   return res;
@@ -947,7 +958,7 @@
         should_try_gc = false;
       } else {
         // Read the GC count while still holding the Heap_lock.
-        gc_count_before = SharedHeap::heap()->total_collections();
+        gc_count_before = total_collections();
         should_try_gc = true;
       }
     }
@@ -965,7 +976,7 @@
         // failed to allocate. No point in trying to allocate
         // further. We'll just return NULL.
         MutexLockerEx x(Heap_lock);
-        *gc_count_before_ret = SharedHeap::heap()->total_collections();
+        *gc_count_before_ret = total_collections();
         return NULL;
       }
     } else {
@@ -1015,6 +1026,16 @@
   assert(isHumongous(word_size), "attempt_allocation_humongous() "
          "should only be called for humongous allocations");
 
+  // Humongous objects can exhaust the heap quickly, so we should check if we
+  // need to start a marking cycle at each humongous object allocation. We do
+  // the check before we do the actual allocation. The reason for doing it
+  // before the allocation is that we avoid having to keep track of the newly
+  // allocated memory while we do a GC.
+  if (g1_policy()->need_to_start_conc_mark("concurrent humongous allocation",
+                                           word_size)) {
+    collect(GCCause::_g1_humongous_allocation);
+  }
+
   // We will loop until a) we manage to successfully perform the
   // allocation or b) we successfully schedule a collection which
   // fails to perform the allocation. b) is the only case when we'll
@@ -1039,7 +1060,7 @@
         should_try_gc = false;
       } else {
         // Read the GC count while still holding the Heap_lock.
-        gc_count_before = SharedHeap::heap()->total_collections();
+        gc_count_before = total_collections();
         should_try_gc = true;
       }
     }
@@ -1061,7 +1082,7 @@
         // failed to allocate. No point in trying to allocate
         // further. We'll just return NULL.
         MutexLockerEx x(Heap_lock);
-        *gc_count_before_ret = SharedHeap::heap()->total_collections();
+        *gc_count_before_ret = total_collections();
         return NULL;
       }
     } else {
@@ -1097,7 +1118,11 @@
     return _mutator_alloc_region.attempt_allocation_locked(word_size,
                                                       false /* bot_updates */);
   } else {
-    return humongous_obj_allocate(word_size);
+    HeapWord* result = humongous_obj_allocate(word_size);
+    if (result != NULL && g1_policy()->need_to_start_conc_mark("STW humongous allocation")) {
+      g1_policy()->set_initiate_conc_mark_if_possible();
+    }
+    return result;
   }
 
   ShouldNotReachHere();
@@ -1165,9 +1190,9 @@
       _g1(g1)
   { }
 
-  void work(int i) {
-    RebuildRSOutOfRegionClosure rebuild_rs(_g1, i);
-    _g1->heap_region_par_iterate_chunked(&rebuild_rs, i,
+  void work(uint worker_id) {
+    RebuildRSOutOfRegionClosure rebuild_rs(_g1, worker_id);
+    _g1->heap_region_par_iterate_chunked(&rebuild_rs, worker_id,
                                           _g1->workers()->active_workers(),
                                          HeapRegion::RebuildRSClaimValue);
   }
@@ -1214,9 +1239,7 @@
   SvcGCMarker sgcm(SvcGCMarker::FULL);
   ResourceMark rm;
 
-  if (PrintHeapAtGC) {
-    Universe::print_heap_before_gc();
-  }
+  print_heap_before_gc();
 
   HRSPhaseSetter x(HRSPhaseFullGC);
   verify_region_sets_optional();
@@ -1243,7 +1266,18 @@
     double start = os::elapsedTime();
     g1_policy()->record_full_collection_start();
 
+    // Note: When we have a more flexible GC logging framework that
+    // allows us to add optional attributes to a GC log record we
+    // could consider timing and reporting how long we wait in the
+    // following two methods.
     wait_while_free_regions_coming();
+    // If we start the compaction before the CM threads finish
+    // scanning the root regions we might trip them over as we'll
+    // be moving objects / updating references. So let's wait until
+    // they are done. By telling them to abort, they should complete
+    // early.
+    _cm->root_regions()->abort();
+    _cm->root_regions()->wait_until_scan_finished();
     append_secondary_free_list_if_not_empty_with_lock();
 
     gc_prologue(true);
@@ -1272,7 +1306,8 @@
     ref_processor_cm()->verify_no_references_recorded();
 
     // Abandon current iterations of concurrent marking and concurrent
-    // refinement, if any are in progress.
+    // refinement, if any are in progress. We have to do this before
+    // wait_until_scan_finished() below.
     concurrent_mark()->abort();
 
     // Make sure we'll choose a new allocation region afterwards.
@@ -1294,7 +1329,7 @@
     g1_policy()->stop_incremental_cset_building();
 
     tear_down_region_sets(false /* free_list_only */);
-    g1_policy()->set_full_young_gcs(true);
+    g1_policy()->set_gcs_are_young(true);
 
     // See the comments in g1CollectedHeap.hpp and
     // G1CollectedHeap::ref_processing_init() about
@@ -1374,7 +1409,7 @@
 
     // Rebuild remembered sets of all regions.
     if (G1CollectedHeap::use_parallel_gc_threads()) {
-      int n_workers =
+      uint n_workers =
         AdaptiveSizePolicy::calc_active_workers(workers()->total_workers(),
                                        workers()->active_workers(),
                                        Threads::number_of_non_daemon_threads());
@@ -1456,9 +1491,7 @@
   _hrs.verify_optional();
   verify_region_sets_optional();
 
-  if (PrintHeapAtGC) {
-    Universe::print_heap_after_gc();
-  }
+  print_heap_after_gc();
   g1mm()->update_sizes();
   post_full_gc_dump();
 
@@ -1838,11 +1871,14 @@
   _young_list(new YoungList(this)),
   _gc_time_stamp(0),
   _retained_old_gc_alloc_region(NULL),
+  _expand_heap_after_alloc_failure(true),
   _surviving_young_words(NULL),
   _full_collections_completed(0),
   _in_cset_fast_test(NULL),
   _in_cset_fast_test_base(NULL),
-  _dirty_cards_region_list(NULL) {
+  _dirty_cards_region_list(NULL),
+  _worker_cset_start_region(NULL),
+  _worker_cset_start_region_time_stamp(NULL) {
   _g1h = this; // To catch bugs.
   if (_process_strong_tasks == NULL || !_process_strong_tasks->valid()) {
     vm_exit_during_initialization("Failed necessary allocation.");
@@ -1863,12 +1899,17 @@
   }
   _rem_set_iterator = iter_arr;
 
+  _worker_cset_start_region = NEW_C_HEAP_ARRAY(HeapRegion*, n_queues);
+  _worker_cset_start_region_time_stamp = NEW_C_HEAP_ARRAY(unsigned int, n_queues);
+
   for (int i = 0; i < n_queues; i++) {
     RefToScanQueue* q = new RefToScanQueue();
     q->initialize();
     _task_queues->register_queue(i, q);
   }
 
+  clear_cset_start_regions();
+
   guarantee(_task_queues != NULL, "task_queues allocation failure.");
 }
 
@@ -2271,9 +2312,12 @@
 }
 
 bool G1CollectedHeap::should_do_concurrent_full_gc(GCCause::Cause cause) {
-  return
-    ((cause == GCCause::_gc_locker           && GCLockerInvokesConcurrent) ||
-     (cause == GCCause::_java_lang_system_gc && ExplicitGCInvokesConcurrent));
+  switch (cause) {
+    case GCCause::_gc_locker:               return GCLockerInvokesConcurrent;
+    case GCCause::_java_lang_system_gc:     return ExplicitGCInvokesConcurrent;
+    case GCCause::_g1_humongous_allocation: return true;
+    default:                                return false;
+  }
 }
 
 #ifndef PRODUCT
@@ -2367,52 +2411,74 @@
 }
 
 void G1CollectedHeap::collect(GCCause::Cause cause) {
-  // The caller doesn't have the Heap_lock
-  assert(!Heap_lock->owned_by_self(), "this thread should not own the Heap_lock");
+  assert_heap_not_locked();
 
   unsigned int gc_count_before;
   unsigned int full_gc_count_before;
-  {
-    MutexLocker ml(Heap_lock);
-
-    // Read the GC count while holding the Heap_lock
-    gc_count_before = SharedHeap::heap()->total_collections();
-    full_gc_count_before = SharedHeap::heap()->total_full_collections();
-  }
-
-  if (should_do_concurrent_full_gc(cause)) {
-    // Schedule an initial-mark evacuation pause that will start a
-    // concurrent cycle. We're setting word_size to 0 which means that
-    // we are not requesting a post-GC allocation.
-    VM_G1IncCollectionPause op(gc_count_before,
-                               0,     /* word_size */
-                               true,  /* should_initiate_conc_mark */
-                               g1_policy()->max_pause_time_ms(),
-                               cause);
-    VMThread::execute(&op);
-  } else {
-    if (cause == GCCause::_gc_locker
-        DEBUG_ONLY(|| cause == GCCause::_scavenge_alot)) {
-
-      // Schedule a standard evacuation pause. We're setting word_size
-      // to 0 which means that we are not requesting a post-GC allocation.
+  bool retry_gc;
+
+  do {
+    retry_gc = false;
+
+    {
+      MutexLocker ml(Heap_lock);
+
+      // Read the GC count while holding the Heap_lock
+      gc_count_before = total_collections();
+      full_gc_count_before = total_full_collections();
+    }
+
+    if (should_do_concurrent_full_gc(cause)) {
+      // Schedule an initial-mark evacuation pause that will start a
+      // concurrent cycle. We're setting word_size to 0 which means that
+      // we are not requesting a post-GC allocation.
       VM_G1IncCollectionPause op(gc_count_before,
                                  0,     /* word_size */
-                                 false, /* should_initiate_conc_mark */
+                                 true,  /* should_initiate_conc_mark */
                                  g1_policy()->max_pause_time_ms(),
                                  cause);
       VMThread::execute(&op);
+      if (!op.pause_succeeded()) {
+        // Another GC got scheduled and prevented us from scheduling
+        // the initial-mark GC. It's unlikely that the GC that
+        // pre-empted us was also an initial-mark GC. So, we'll retry
+        // the initial-mark GC.
+
+        if (full_gc_count_before == total_full_collections()) {
+          retry_gc = true;
+        } else {
+          // A Full GC happened while we were trying to schedule the
+          // initial-mark GC. No point in starting a new cycle given
+          // that the whole heap was collected anyway.
+        }
+      }
     } else {
-      // Schedule a Full GC.
-      VM_G1CollectFull op(gc_count_before, full_gc_count_before, cause);
-      VMThread::execute(&op);
+      if (cause == GCCause::_gc_locker
+          DEBUG_ONLY(|| cause == GCCause::_scavenge_alot)) {
+
+        // Schedule a standard evacuation pause. We're setting word_size
+        // to 0 which means that we are not requesting a post-GC allocation.
+        VM_G1IncCollectionPause op(gc_count_before,
+                                   0,     /* word_size */
+                                   false, /* should_initiate_conc_mark */
+                                   g1_policy()->max_pause_time_ms(),
+                                   cause);
+        VMThread::execute(&op);
+      } else {
+        // Schedule a Full GC.
+        VM_G1CollectFull op(gc_count_before, full_gc_count_before, cause);
+        VMThread::execute(&op);
+      }
     }
-  }
+  } while (retry_gc);
 }
 
 bool G1CollectedHeap::is_in(const void* p) const {
-  HeapRegion* hr = _hrs.addr_to_region((HeapWord*) p);
-  if (hr != NULL) {
+  if (_g1_committed.contains(p)) {
+    // Given that we know that p is in the committed space,
+    // heap_region_containing_raw() should successfully
+    // return the containing region.
+    HeapRegion* hr = heap_region_containing_raw(p);
     return hr->is_in(p);
   } else {
     return _perm_gen->as_gen()->is_in(p);
@@ -2509,11 +2575,11 @@
 
 void
 G1CollectedHeap::heap_region_par_iterate_chunked(HeapRegionClosure* cl,
-                                                 int worker,
-                                                 int no_of_par_workers,
+                                                 uint worker,
+                                                 uint no_of_par_workers,
                                                  jint claim_value) {
   const size_t regions = n_regions();
-  const size_t max_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
+  const uint max_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
                              no_of_par_workers :
                              1);
   assert(UseDynamicNumberOfGCThreads ||
@@ -2595,12 +2661,16 @@
   }
 };
 
-void
-G1CollectedHeap::reset_heap_region_claim_values() {
+void G1CollectedHeap::reset_heap_region_claim_values() {
   ResetClaimValuesClosure blk;
   heap_region_iterate(&blk);
 }
 
+void G1CollectedHeap::reset_cset_heap_region_claim_values() {
+  ResetClaimValuesClosure blk;
+  collection_set_iterate(&blk);
+}
+
 #ifdef ASSERT
 // This checks whether all regions in the heap have the correct claim
 // value. I also piggy-backed on this a check to ensure that the
@@ -2684,25 +2754,80 @@
 }
 #endif // ASSERT
 
-// We want the parallel threads to start their collection
-// set iteration at different collection set regions to
-// avoid contention.
-// If we have:
-//          n collection set regions
-//          p threads
-// Then thread t will start at region t * floor (n/p)
-
+// Clear the cached CSet starting regions and (more importantly)
+// the time stamps. Called when we reset the GC time stamp.
+void G1CollectedHeap::clear_cset_start_regions() {
+  assert(_worker_cset_start_region != NULL, "sanity");
+  assert(_worker_cset_start_region_time_stamp != NULL, "sanity");
+
+  int n_queues = MAX2((int)ParallelGCThreads, 1);
+  for (int i = 0; i < n_queues; i++) {
+    _worker_cset_start_region[i] = NULL;
+    _worker_cset_start_region_time_stamp[i] = 0;
+  }
+}
+
+// Given the id of a worker, obtain or calculate a suitable
+// starting region for iterating over the current collection set.
 HeapRegion* G1CollectedHeap::start_cset_region_for_worker(int worker_i) {
-  HeapRegion* result = g1_policy()->collection_set();
+  assert(get_gc_time_stamp() > 0, "should have been updated by now");
+
+  HeapRegion* result = NULL;
+  unsigned gc_time_stamp = get_gc_time_stamp();
+
+  if (_worker_cset_start_region_time_stamp[worker_i] == gc_time_stamp) {
+    // Cached starting region for current worker was set
+    // during the current pause - so it's valid.
+    // Note: the cached starting heap region may be NULL
+    // (when the collection set is empty).
+    result = _worker_cset_start_region[worker_i];
+    assert(result == NULL || result->in_collection_set(), "sanity");
+    return result;
+  }
+
+  // The cached entry was not valid so let's calculate
+  // a suitable starting heap region for this worker.
+
+  // We want the parallel threads to start their collection
+  // set iteration at different collection set regions to
+  // avoid contention.
+  // If we have:
+  //          n collection set regions
+  //          p threads
+  // Then thread t will start at region floor ((t * n) / p)
+
+  result = g1_policy()->collection_set();
   if (G1CollectedHeap::use_parallel_gc_threads()) {
     size_t cs_size = g1_policy()->cset_region_length();
-    int n_workers = workers()->total_workers();
-    size_t cs_spans = cs_size / n_workers;
-    size_t ind      = cs_spans * worker_i;
-    for (size_t i = 0; i < ind; i++) {
+    uint active_workers = workers()->active_workers();
+    assert(UseDynamicNumberOfGCThreads ||
+             active_workers == workers()->total_workers(),
+             "Unless dynamic should use total workers");
+
+    size_t end_ind   = (cs_size * worker_i) / active_workers;
+    size_t start_ind = 0;
+
+    if (worker_i > 0 &&
+        _worker_cset_start_region_time_stamp[worker_i - 1] == gc_time_stamp) {
+      // Previous workers starting region is valid
+      // so let's iterate from there
+      start_ind = (cs_size * (worker_i - 1)) / active_workers;
+      result = _worker_cset_start_region[worker_i - 1];
+    }
+
+    for (size_t i = start_ind; i < end_ind; i++) {
       result = result->next_in_collection_set();
     }
   }
+
+  // Note: the calculated starting heap region may be NULL
+  // (when the collection set is empty).
+  assert(result == NULL || result->in_collection_set(), "sanity");
+  assert(_worker_cset_start_region_time_stamp[worker_i] != gc_time_stamp,
+         "should be updated only once per pause");
+  _worker_cset_start_region[worker_i] = result;
+  OrderAccess::storestore();
+  _worker_cset_start_region_time_stamp[worker_i] = gc_time_stamp;
   return result;
 }
 
@@ -2935,14 +3060,20 @@
       } else {
         VerifyObjsInRegionClosure not_dead_yet_cl(r, _vo);
         r->object_iterate(&not_dead_yet_cl);
-        if (r->max_live_bytes() < not_dead_yet_cl.live_bytes()) {
-          gclog_or_tty->print_cr("["PTR_FORMAT","PTR_FORMAT"] "
-                                 "max_live_bytes "SIZE_FORMAT" "
-                                 "< calculated "SIZE_FORMAT,
-                                 r->bottom(), r->end(),
-                                 r->max_live_bytes(),
+        if (_vo != VerifyOption_G1UseNextMarking) {
+          if (r->max_live_bytes() < not_dead_yet_cl.live_bytes()) {
+            gclog_or_tty->print_cr("["PTR_FORMAT","PTR_FORMAT"] "
+                                   "max_live_bytes "SIZE_FORMAT" "
+                                   "< calculated "SIZE_FORMAT,
+                                   r->bottom(), r->end(),
+                                   r->max_live_bytes(),
                                  not_dead_yet_cl.live_bytes());
-          _failures = true;
+            _failures = true;
+          }
+        } else {
+          // When vo == UseNextMarking we cannot currently do a sanity
+          // check on the live bytes as the calculation has not been
+          // finalized yet.
         }
       }
     }
@@ -3010,10 +3141,10 @@
     return _failures;
   }
 
-  void work(int worker_i) {
+  void work(uint worker_id) {
     HandleMark hm;
     VerifyRegionClosure blk(_allow_dirty, true, _vo);
-    _g1h->heap_region_par_iterate_chunked(&blk, worker_i,
+    _g1h->heap_region_par_iterate_chunked(&blk, worker_id,
                                           _g1h->workers()->active_workers(),
                                           HeapRegion::ParVerifyClaimValue);
     if (blk.failures()) {
@@ -3040,12 +3171,12 @@
 
     // We apply the relevant closures to all the oops in the
     // system dictionary, the string table and the code cache.
-    const int so = SharedHeap::SO_AllClasses | SharedHeap::SO_Strings | SharedHeap::SO_CodeCache;
+    const int so = SO_AllClasses | SO_Strings | SO_CodeCache;
 
     process_strong_roots(true,      // activate StrongRootsScope
                          true,      // we set "collecting perm gen" to true,
                                     // so we don't reset the dirty cards in the perm gen.
-                         SharedHeap::ScanningOption(so),  // roots scanning options
+                         ScanningOption(so),  // roots scanning options
                          &rootsCl,
                          &blobsCl,
                          &rootsCl);
@@ -3316,16 +3447,6 @@
   }
 }
 
-double G1CollectedHeap::predict_region_elapsed_time_ms(HeapRegion *hr,
-                                                       bool young) {
-  return _g1_policy->predict_region_elapsed_time_ms(hr, young);
-}
-
-void G1CollectedHeap::check_if_region_is_too_expensive(double
-                                                           predicted_time_ms) {
-  _g1_policy->check_if_region_is_too_expensive(predicted_time_ms);
-}
-
 size_t G1CollectedHeap::pending_card_num() {
   size_t extra_cards = 0;
   JavaThread *curr = Threads::first();
@@ -3447,34 +3568,37 @@
   SvcGCMarker sgcm(SvcGCMarker::MINOR);
   ResourceMark rm;
 
-  if (PrintHeapAtGC) {
-    Universe::print_heap_before_gc();
-  }
+  print_heap_before_gc();
 
   HRSPhaseSetter x(HRSPhaseEvacuation);
   verify_region_sets_optional();
   verify_dirty_young_regions();
 
+  // This call will decide whether this pause is an initial-mark
+  // pause. If it is, during_initial_mark_pause() will return true
+  // for the duration of this pause.
+  g1_policy()->decide_on_conc_mark_initiation();
+
+  // We do not allow initial-mark to be piggy-backed on a mixed GC.
+  assert(!g1_policy()->during_initial_mark_pause() ||
+          g1_policy()->gcs_are_young(), "sanity");
+
+  // We also do not allow mixed GCs during marking.
+  assert(!mark_in_progress() || g1_policy()->gcs_are_young(), "sanity");
+
+  // Record whether this pause is an initial mark. When the current
+  // thread has completed its logging output and it's safe to signal
+  // the CM thread, the flag's value in the policy has been reset.
+  bool should_start_conc_mark = g1_policy()->during_initial_mark_pause();
+
+  // Inner scope for scope based logging, timers, and stats collection
   {
-    // This call will decide whether this pause is an initial-mark
-    // pause. If it is, during_initial_mark_pause() will return true
-    // for the duration of this pause.
-    g1_policy()->decide_on_conc_mark_initiation();
-
-    // We do not allow initial-mark to be piggy-backed on a
-    // partially-young GC.
-    assert(!g1_policy()->during_initial_mark_pause() ||
-            g1_policy()->full_young_gcs(), "sanity");
-
-    // We also do not allow partially-young GCs during marking.
-    assert(!mark_in_progress() || g1_policy()->full_young_gcs(), "sanity");
-
     char verbose_str[128];
     sprintf(verbose_str, "GC pause ");
-    if (g1_policy()->full_young_gcs()) {
+    if (g1_policy()->gcs_are_young()) {
       strcat(verbose_str, "(young)");
     } else {
-      strcat(verbose_str, "(partial)");
+      strcat(verbose_str, "(mixed)");
     }
     if (g1_policy()->during_initial_mark_pause()) {
       strcat(verbose_str, " (initial-mark)");
@@ -3524,7 +3648,6 @@
         Universe::verify(/* allow dirty */ false,
                          /* silent      */ false,
                          /* option      */ VerifyOption_G1UsePrevMarking);
-
       }
 
       COMPILER2_PRESENT(DerivedPointerTable::clear());
@@ -3567,6 +3690,18 @@
         g1_policy()->record_collection_pause_start(start_time_sec,
                                                    start_used_bytes);
 
+        double scan_wait_start = os::elapsedTime();
+        // We have to wait until the CM threads finish scanning the
+        // root regions as it's the only way to ensure that all the
+        // objects on them have been correctly scanned before we start
+        // moving them during the GC.
+        bool waited = _cm->root_regions()->wait_until_scan_finished();
+        if (waited) {
+          double scan_wait_end = os::elapsedTime();
+          double wait_time_ms = (scan_wait_end - scan_wait_start) * 1000.0;
+          g1_policy()->record_root_region_scan_wait_time(wait_time_ms);
+        }
+
 #if YOUNG_LIST_VERBOSE
         gclog_or_tty->print_cr("\nAfter recording pause start.\nYoung_list:");
         _young_list->print();
@@ -3577,32 +3712,23 @@
         }
         perm_gen()->save_marks();
 
-        // We must do this before any possible evacuation that should propagate
-        // marks.
-        if (mark_in_progress()) {
-          double start_time_sec = os::elapsedTime();
-
-          _cm->drainAllSATBBuffers();
-          double finish_mark_ms = (os::elapsedTime() - start_time_sec) * 1000.0;
-          g1_policy()->record_satb_drain_time(finish_mark_ms);
-        }
-        // Record the number of elements currently on the mark stack, so we
-        // only iterate over these.  (Since evacuation may add to the mark
-        // stack, doing more exposes race conditions.)  If no mark is in
-        // progress, this will be zero.
-        _cm->set_oops_do_bound();
-
-        if (mark_in_progress()) {
-          concurrent_mark()->newCSet();
-        }
-
 #if YOUNG_LIST_VERBOSE
         gclog_or_tty->print_cr("\nBefore choosing collection set.\nYoung_list:");
         _young_list->print();
         g1_policy()->print_collection_set(g1_policy()->inc_cset_head(), gclog_or_tty);
 #endif // YOUNG_LIST_VERBOSE
 
-        g1_policy()->choose_collection_set(target_pause_time_ms);
+        g1_policy()->finalize_cset(target_pause_time_ms);
+
+        _cm->note_start_of_gc();
+        // We should not verify the per-thread SATB buffers given that
+        // we have not filtered them yet (we'll do so during the
+        // GC). We also call this after finalize_cset() to
+        // ensure that the CSet has been finalized.
+        _cm->verify_no_cset_oops(true  /* verify_stacks */,
+                                 true  /* verify_enqueued_buffers */,
+                                 false /* verify_thread_buffers */,
+                                 true  /* verify_fingers */);
 
         if (_hr_printer.is_active()) {
           HeapRegion* hr = g1_policy()->collection_set();
@@ -3620,16 +3746,6 @@
           }
         }
 
-        // We have chosen the complete collection set. If marking is
-        // active then, we clear the region fields of any of the
-        // concurrent marking tasks whose region fields point into
-        // the collection set as these values will become stale. This
-        // will cause the owning marking threads to claim a new region
-        // when marking restarts.
-        if (mark_in_progress()) {
-          concurrent_mark()->reset_active_task_region_fields_in_cset();
-        }
-
 #ifdef ASSERT
         VerifyCSetClosure cl;
         collection_set_iterate(&cl);
@@ -3643,6 +3759,16 @@
         // Actually do the work...
         evacuate_collection_set();
 
+        // We do this to mainly verify the per-thread SATB buffers
+        // (which have been filtered by now) since we didn't verify
+        // them earlier. No point in re-checking the stacks / enqueued
+        // buffers given that the CSet has not changed since last time
+        // we checked.
+        _cm->verify_no_cset_oops(false /* verify_stacks */,
+                                 false /* verify_enqueued_buffers */,
+                                 true  /* verify_thread_buffers */,
+                                 true  /* verify_fingers */);
+
         free_collection_set(g1_policy()->collection_set());
         g1_policy()->clear_collection_set();
 
@@ -3685,16 +3811,14 @@
         }
 
         if (g1_policy()->during_initial_mark_pause()) {
+          // We have to do this before we notify the CM threads that
+          // they can start working to make sure that all the
+          // appropriate initialization is done on the CM object.
           concurrent_mark()->checkpointRootsInitialPost();
           set_marking_started();
-          // CAUTION: after the doConcurrentMark() call below,
-          // the concurrent marking thread(s) could be running
-          // concurrently with us. Make sure that anything after
-          // this point does not assume that we are the only GC thread
-          // running. Note: of course, the actual marking work will
-          // not start until the safepoint itself is released in
-          // ConcurrentGCThread::safepoint_desynchronize().
-          doConcurrentMark();
+          // Note that we don't actually trigger the CM thread at
+          // this point. We do that later when we're sure that
+          // the current thread has completed its logging output.
         }
 
         allocate_dummy_regions();
@@ -3711,6 +3835,8 @@
           size_t expand_bytes = g1_policy()->expansion_amount();
           if (expand_bytes > 0) {
             size_t bytes_before = capacity();
+            // No need for an ergo verbose message here,
+            // expansion_amount() does this when it returns a value > 0.
             if (!expand(expand_bytes)) {
               // We failed to expand the heap so let's verify that
               // committed/uncommitted amount match the backing store
@@ -3720,11 +3846,20 @@
           }
         }
 
+        // We redo the verificaiton but now wrt to the new CSet which
+        // has just got initialized after the previous CSet was freed.
+        _cm->verify_no_cset_oops(true  /* verify_stacks */,
+                                 true  /* verify_enqueued_buffers */,
+                                 true  /* verify_thread_buffers */,
+                                 true  /* verify_fingers */);
+        _cm->note_end_of_gc();
+
         double end_time_sec = os::elapsedTime();
         double pause_time_ms = (end_time_sec - start_time_sec) * MILLIUNITS;
         g1_policy()->record_pause_time_ms(pause_time_ms);
-        int active_gc_threads = workers()->active_workers();
-        g1_policy()->record_collection_pause_end(active_gc_threads);
+        int active_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
+                                workers()->active_workers() : 1);
+        g1_policy()->record_collection_pause_end(active_workers);
 
         MemoryService::track_memory_usage();
 
@@ -3766,21 +3901,6 @@
         // CM reference discovery will be re-enabled if necessary.
       }
 
-      {
-        size_t expand_bytes = g1_policy()->expansion_amount();
-        if (expand_bytes > 0) {
-          size_t bytes_before = capacity();
-          // No need for an ergo verbose message here,
-          // expansion_amount() does this when it returns a value > 0.
-          if (!expand(expand_bytes)) {
-            // We failed to expand the heap so let's verify that
-            // committed/uncommitted amount match the backing store
-            assert(capacity() == _g1_storage.committed_size(), "committed size mismatch");
-            assert(max_capacity() == _g1_storage.reserved_size(), "reserved size mismatch");
-          }
-        }
-      }
-
       // We should do this after we potentially expand the heap so
       // that all the COMMIT events are generated before the end GC
       // event, and after we retire the GC alloc regions so that all
@@ -3808,15 +3928,22 @@
     }
   }
 
+  // The closing of the inner scope, immediately above, will complete
+  // the PrintGC logging output. The record_collection_pause_end() call
+  // above will complete the logging output of PrintGCDetails.
+  //
+  // It is not yet to safe, however, to tell the concurrent mark to
+  // start as we have some optional output below. We don't want the
+  // output from the concurrent mark thread interfering with this
+  // logging output either.
+
   _hrs.verify_optional();
   verify_region_sets_optional();
 
   TASKQUEUE_STATS_ONLY(if (ParallelGCVerbose) print_taskqueue_stats());
   TASKQUEUE_STATS_ONLY(reset_taskqueue_stats());
 
-  if (PrintHeapAtGC) {
-    Universe::print_heap_after_gc();
-  }
+  print_heap_after_gc();
   g1mm()->update_sizes();
 
   if (G1SummarizeRSetStats &&
@@ -3825,6 +3952,21 @@
     g1_rem_set()->print_summary_info();
   }
 
+  // It should now be safe to tell the concurrent mark thread to start
+  // without its logging output interfering with the logging output
+  // that came from the pause.
+
+  if (should_start_conc_mark) {
+    // CAUTION: after the doConcurrentMark() call below,
+    // the concurrent marking thread(s) could be running
+    // concurrently with us. Make sure that anything after
+    // this point does not assume that we are the only GC thread
+    // running. Note: of course, the actual marking work will
+    // not start until the safepoint itself is released in
+    // ConcurrentGCThread::safepoint_desynchronize().
+    doConcurrentMark();
+  }
+
   return true;
 }
 
@@ -3884,6 +4026,8 @@
     // we allocate to in the region sets. We'll re-add it later, when
     // it's retired again.
     _old_set.remove(retained_region);
+    bool during_im = g1_policy()->during_initial_mark_pause();
+    retained_region->note_start_of_copying(during_im);
     _old_gc_alloc_region.set(retained_region);
     _hr_printer.reuse(retained_region);
   }
@@ -3920,157 +4064,26 @@
   _evac_failure_scan_stack = NULL;
 }
 
-class UpdateRSetDeferred : public OopsInHeapRegionClosure {
-private:
-  G1CollectedHeap* _g1;
-  DirtyCardQueue *_dcq;
-  CardTableModRefBS* _ct_bs;
-
-public:
-  UpdateRSetDeferred(G1CollectedHeap* g1, DirtyCardQueue* dcq) :
-    _g1(g1), _ct_bs((CardTableModRefBS*)_g1->barrier_set()), _dcq(dcq) {}
-
-  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
-  virtual void do_oop(      oop* p) { do_oop_work(p); }
-  template <class T> void do_oop_work(T* p) {
-    assert(_from->is_in_reserved(p), "paranoia");
-    if (!_from->is_in_reserved(oopDesc::load_decode_heap_oop(p)) &&
-        !_from->is_survivor()) {
-      size_t card_index = _ct_bs->index_for(p);
-      if (_ct_bs->mark_card_deferred(card_index)) {
-        _dcq->enqueue((jbyte*)_ct_bs->byte_for_index(card_index));
-      }
-    }
-  }
-};
-
-class RemoveSelfPointerClosure: public ObjectClosure {
-private:
-  G1CollectedHeap* _g1;
-  ConcurrentMark* _cm;
-  HeapRegion* _hr;
-  size_t _prev_marked_bytes;
-  size_t _next_marked_bytes;
-  OopsInHeapRegionClosure *_cl;
-public:
-  RemoveSelfPointerClosure(G1CollectedHeap* g1, HeapRegion* hr,
-                           OopsInHeapRegionClosure* cl) :
-    _g1(g1), _hr(hr), _cm(_g1->concurrent_mark()),  _prev_marked_bytes(0),
-    _next_marked_bytes(0), _cl(cl) {}
-
-  size_t prev_marked_bytes() { return _prev_marked_bytes; }
-  size_t next_marked_bytes() { return _next_marked_bytes; }
-
-  // <original comment>
-  // The original idea here was to coalesce evacuated and dead objects.
-  // However that caused complications with the block offset table (BOT).
-  // In particular if there were two TLABs, one of them partially refined.
-  // |----- TLAB_1--------|----TLAB_2-~~~(partially refined part)~~~|
-  // The BOT entries of the unrefined part of TLAB_2 point to the start
-  // of TLAB_2. If the last object of the TLAB_1 and the first object
-  // of TLAB_2 are coalesced, then the cards of the unrefined part
-  // would point into middle of the filler object.
-  // The current approach is to not coalesce and leave the BOT contents intact.
-  // </original comment>
-  //
-  // We now reset the BOT when we start the object iteration over the
-  // region and refine its entries for every object we come across. So
-  // the above comment is not really relevant and we should be able
-  // to coalesce dead objects if we want to.
-  void do_object(oop obj) {
-    HeapWord* obj_addr = (HeapWord*) obj;
-    assert(_hr->is_in(obj_addr), "sanity");
-    size_t obj_size = obj->size();
-    _hr->update_bot_for_object(obj_addr, obj_size);
-    if (obj->is_forwarded() && obj->forwardee() == obj) {
-      // The object failed to move.
-      assert(!_g1->is_obj_dead(obj), "We should not be preserving dead objs.");
-      _cm->markPrev(obj);
-      assert(_cm->isPrevMarked(obj), "Should be marked!");
-      _prev_marked_bytes += (obj_size * HeapWordSize);
-      if (_g1->mark_in_progress() && !_g1->is_obj_ill(obj)) {
-        _cm->markAndGrayObjectIfNecessary(obj);
-      }
-      obj->set_mark(markOopDesc::prototype());
-      // While we were processing RSet buffers during the
-      // collection, we actually didn't scan any cards on the
-      // collection set, since we didn't want to update remebered
-      // sets with entries that point into the collection set, given
-      // that live objects fromthe collection set are about to move
-      // and such entries will be stale very soon. This change also
-      // dealt with a reliability issue which involved scanning a
-      // card in the collection set and coming across an array that
-      // was being chunked and looking malformed. The problem is
-      // that, if evacuation fails, we might have remembered set
-      // entries missing given that we skipped cards on the
-      // collection set. So, we'll recreate such entries now.
-      obj->oop_iterate(_cl);
-      assert(_cm->isPrevMarked(obj), "Should be marked!");
-    } else {
-      // The object has been either evacuated or is dead. Fill it with a
-      // dummy object.
-      MemRegion mr((HeapWord*)obj, obj_size);
-      CollectedHeap::fill_with_object(mr);
-      _cm->clearRangeBothMaps(mr);
-    }
-  }
-};
-
 void G1CollectedHeap::remove_self_forwarding_pointers() {
-  UpdateRSetImmediate immediate_update(_g1h->g1_rem_set());
-  DirtyCardQueue dcq(&_g1h->dirty_card_queue_set());
-  UpdateRSetDeferred deferred_update(_g1h, &dcq);
-  OopsInHeapRegionClosure *cl;
-  if (G1DeferredRSUpdate) {
-    cl = &deferred_update;
+  assert(check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity");
+  assert(g1_policy()->assertMarkedBytesDataOK(), "Should be!");
+
+  G1ParRemoveSelfForwardPtrsTask rsfp_task(this);
+
+  if (G1CollectedHeap::use_parallel_gc_threads()) {
+    set_par_threads();
+    workers()->run_task(&rsfp_task);
+    set_par_threads(0);
   } else {
-    cl = &immediate_update;
-  }
-  HeapRegion* cur = g1_policy()->collection_set();
-  while (cur != NULL) {
-    assert(g1_policy()->assertMarkedBytesDataOK(), "Should be!");
-    assert(!cur->isHumongous(), "sanity");
-
-    if (cur->evacuation_failed()) {
-      assert(cur->in_collection_set(), "bad CS");
-      RemoveSelfPointerClosure rspc(_g1h, cur, cl);
-
-      // In the common case we make sure that this is done when the
-      // region is freed so that it is "ready-to-go" when it's
-      // re-allocated. However, when evacuation failure happens, a
-      // region will remain in the heap and might ultimately be added
-      // to a CSet in the future. So we have to be careful here and
-      // make sure the region's RSet is ready for parallel iteration
-      // whenever this might be required in the future.
-      cur->rem_set()->reset_for_par_iteration();
-      cur->reset_bot();
-      cl->set_region(cur);
-      cur->object_iterate(&rspc);
-
-      // A number of manipulations to make the TAMS be the current top,
-      // and the marked bytes be the ones observed in the iteration.
-      if (_g1h->concurrent_mark()->at_least_one_mark_complete()) {
-        // The comments below are the postconditions achieved by the
-        // calls.  Note especially the last such condition, which says that
-        // the count of marked bytes has been properly restored.
-        cur->note_start_of_marking(false);
-        // _next_top_at_mark_start == top, _next_marked_bytes == 0
-        cur->add_to_marked_bytes(rspc.prev_marked_bytes());
-        // _next_marked_bytes == prev_marked_bytes.
-        cur->note_end_of_marking();
-        // _prev_top_at_mark_start == top(),
-        // _prev_marked_bytes == prev_marked_bytes
-      }
-      // If there is no mark in progress, we modified the _next variables
-      // above needlessly, but harmlessly.
-      if (_g1h->mark_in_progress()) {
-        cur->note_start_of_marking(false);
-        // _next_top_at_mark_start == top, _next_marked_bytes == 0
-        // _next_marked_bytes == next_marked_bytes.
-      }
-    }
-    cur = cur->next_in_collection_set();
-  }
+    rsfp_task.work(0);
+  }
+
+  assert(check_cset_heap_region_claim_values(HeapRegion::ParEvacFailureClaimValue), "sanity");
+
+  // Reset the claim values in the regions in the collection set.
+  reset_cset_heap_region_claim_values();
+
+  assert(check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity");
   assert(g1_policy()->assertMarkedBytesDataOK(), "Should be!");
 
   // Now restore saved marks, if any.
@@ -4083,6 +4096,7 @@
       markOop m = _preserved_marks_of_objs->at(i);
       obj->set_mark(m);
     }
+
     // Delete the preserved marks growable arrays (allocated on the C heap).
     delete _objs_with_preserved_marks;
     delete _preserved_marks_of_objs;
@@ -4107,8 +4121,7 @@
 
 oop
 G1CollectedHeap::handle_evacuation_failure_par(OopsInHeapRegionClosure* cl,
-                                               oop old,
-                                               bool should_mark_root) {
+                                               oop old) {
   assert(obj_in_cs(old),
          err_msg("obj: "PTR_FORMAT" should still be in the CSet",
                  (HeapWord*) old));
@@ -4117,15 +4130,6 @@
   if (forward_ptr == NULL) {
     // Forward-to-self succeeded.
 
-    // should_mark_root will be true when this routine is called
-    // from a root scanning closure during an initial mark pause.
-    // In this case the thread that succeeds in self-forwarding the
-    // object is also responsible for marking the object.
-    if (should_mark_root) {
-      assert(!oopDesc::is_null(old), "shouldn't be");
-      _cm->grayRoot(old);
-    }
-
     if (_evac_failure_closure != cl) {
       MutexLockerEx x(EvacFailureStack_lock, Mutex::_no_safepoint_check_flag);
       assert(!_drain_in_progress,
@@ -4221,32 +4225,10 @@
   return NULL;
 }
 
-#ifndef PRODUCT
-bool GCLabBitMapClosure::do_bit(size_t offset) {
-  HeapWord* addr = _bitmap->offsetToHeapWord(offset);
-  guarantee(_cm->isMarked(oop(addr)), "it should be!");
-  return true;
-}
-#endif // PRODUCT
-
 G1ParGCAllocBuffer::G1ParGCAllocBuffer(size_t gclab_word_size) :
-  ParGCAllocBuffer(gclab_word_size),
-  _should_mark_objects(false),
-  _bitmap(G1CollectedHeap::heap()->reserved_region().start(), gclab_word_size),
-  _retired(false)
-{
-  //_should_mark_objects is set to true when G1ParCopyHelper needs to
-  // mark the forwarded location of an evacuated object.
-  // We set _should_mark_objects to true if marking is active, i.e. when we
-  // need to propagate a mark, or during an initial mark pause, i.e. when we
-  // need to mark objects immediately reachable by the roots.
-  if (G1CollectedHeap::heap()->mark_in_progress() ||
-      G1CollectedHeap::heap()->g1_policy()->during_initial_mark_pause()) {
-    _should_mark_objects = true;
-  }
-}
-
-G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h, int queue_num)
+  ParGCAllocBuffer(gclab_word_size), _retired(false) { }
+
+G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h, uint queue_num)
   : _g1h(g1h),
     _refs(g1h->task_queue(queue_num)),
     _dcq(&g1h->dirty_card_queue_set()),
@@ -4258,8 +4240,7 @@
     _tenured_alloc_buffer(g1h->desired_plab_sz(GCAllocForTenured)),
     _age_table(false),
     _strong_roots_time(0), _term_time(0),
-    _alloc_buffer_waste(0), _undo_waste(0)
-{
+    _alloc_buffer_waste(0), _undo_waste(0) {
   // we allocate G1YoungSurvRateNumRegions plus one entries, since
   // we "sacrifice" entry 0 to keep track of surviving bytes for
   // non-young regions (where the age is -1)
@@ -4364,35 +4345,54 @@
   } while (!refs()->is_empty());
 }
 
-G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
+G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1,
+                                     G1ParScanThreadState* par_scan_state) :
   _g1(g1), _g1_rem(_g1->g1_rem_set()), _cm(_g1->concurrent_mark()),
   _par_scan_state(par_scan_state),
+  _worker_id(par_scan_state->queue_num()),
   _during_initial_mark(_g1->g1_policy()->during_initial_mark_pause()),
   _mark_in_progress(_g1->mark_in_progress()) { }
 
-template <class T> void G1ParCopyHelper::mark_object(T* p) {
-  // This is called from do_oop_work for objects that are not
-  // in the collection set. Objects in the collection set
-  // are marked after they have been evacuated.
-
-  T heap_oop = oopDesc::load_heap_oop(p);
-  if (!oopDesc::is_null(heap_oop)) {
-    oop obj = oopDesc::decode_heap_oop(heap_oop);
-    HeapWord* addr = (HeapWord*)obj;
-    if (_g1->is_in_g1_reserved(addr)) {
-      _cm->grayRoot(oop(addr));
-    }
-  }
-}
-
-oop G1ParCopyHelper::copy_to_survivor_space(oop old, bool should_mark_root,
-                                                     bool should_mark_copy) {
+void G1ParCopyHelper::mark_object(oop obj) {
+#ifdef ASSERT
+  HeapRegion* hr = _g1->heap_region_containing(obj);
+  assert(hr != NULL, "sanity");
+  assert(!hr->in_collection_set(), "should not mark objects in the CSet");
+#endif // ASSERT
+
+  // We know that the object is not moving so it's safe to read its size.
+  _cm->grayRoot(obj, (size_t) obj->size(), _worker_id);
+}
+
+void G1ParCopyHelper::mark_forwarded_object(oop from_obj, oop to_obj) {
+#ifdef ASSERT
+  assert(from_obj->is_forwarded(), "from obj should be forwarded");
+  assert(from_obj->forwardee() == to_obj, "to obj should be the forwardee");
+  assert(from_obj != to_obj, "should not be self-forwarded");
+
+  HeapRegion* from_hr = _g1->heap_region_containing(from_obj);
+  assert(from_hr != NULL, "sanity");
+  assert(from_hr->in_collection_set(), "from obj should be in the CSet");
+
+  HeapRegion* to_hr = _g1->heap_region_containing(to_obj);
+  assert(to_hr != NULL, "sanity");
+  assert(!to_hr->in_collection_set(), "should not mark objects in the CSet");
+#endif // ASSERT
+
+  // The object might be in the process of being copied by another
+  // worker so we cannot trust that its to-space image is
+  // well-formed. So we have to read its size from its from-space
+  // image which we know should not be changing.
+  _cm->grayRoot(to_obj, (size_t) from_obj->size(), _worker_id);
+}
+
+oop G1ParCopyHelper::copy_to_survivor_space(oop old) {
   size_t    word_sz = old->size();
   HeapRegion* from_region = _g1->heap_region_containing_raw(old);
   // +1 to make the -1 indexes valid...
   int       young_index = from_region->young_index_in_cset()+1;
-  assert( (from_region->is_young() && young_index > 0) ||
-          (!from_region->is_young() && young_index == 0), "invariant" );
+  assert( (from_region->is_young() && young_index >  0) ||
+         (!from_region->is_young() && young_index == 0), "invariant" );
   G1CollectorPolicy* g1p = _g1->g1_policy();
   markOop m = old->mark();
   int age = m->has_displaced_mark_helper() ? m->displaced_mark_helper()->age()
@@ -4406,7 +4406,7 @@
     // This will either forward-to-self, or detect that someone else has
     // installed a forwarding pointer.
     OopsInHeapRegionClosure* cl = _par_scan_state->evac_failure_closure();
-    return _g1->handle_evacuation_failure_par(cl, old, should_mark_root);
+    return _g1->handle_evacuation_failure_par(cl, old);
   }
 
   // We're going to allocate linearly, so might as well prefetch ahead.
@@ -4442,28 +4442,14 @@
       obj->set_mark(m);
     }
 
-    // Mark the evacuated object or propagate "next" mark bit
-    if (should_mark_copy) {
-      if (!use_local_bitmaps ||
-          !_par_scan_state->alloc_buffer(alloc_purpose)->mark(obj_ptr)) {
-        // if we couldn't mark it on the local bitmap (this happens when
-        // the object was not allocated in the GCLab), we have to bite
-        // the bullet and do the standard parallel mark
-        _cm->markAndGrayObjectIfNecessary(obj);
-      }
-
-      if (_g1->isMarkedNext(old)) {
-        // Unmark the object's old location so that marking
-        // doesn't think the old object is alive.
-        _cm->nextMarkBitMap()->parClear((HeapWord*)old);
-      }
-    }
-
     size_t* surv_young_words = _par_scan_state->surviving_young_words();
     surv_young_words[young_index] += word_sz;
 
     if (obj->is_objArray() && arrayOop(obj)->length() >= ParGCArrayScanChunk) {
-      arrayOop(old)->set_length(0);
+      // We keep track of the next start index in the length field of
+      // the to-space object. The actual length can be found in the
+      // length field of the from-space object.
+      arrayOop(obj)->set_length(0);
       oop* old_p = set_partial_array_mask(old);
       _par_scan_state->push_on_queue(old_p);
     } else {
@@ -4485,76 +4471,41 @@
 ::do_oop_work(T* p) {
   oop obj = oopDesc::load_decode_heap_oop(p);
   assert(barrier != G1BarrierRS || obj != NULL,
-         "Precondition: G1BarrierRS implies obj is nonNull");
-
-  // Marking:
-  // If the object is in the collection set, then the thread
-  // that copies the object should mark, or propagate the
-  // mark to, the evacuated object.
-  // If the object is not in the collection set then we
-  // should call the mark_object() method depending on the
-  // value of the template parameter do_mark_object (which will
-  // be true for root scanning closures during an initial mark
-  // pause).
-  // The mark_object() method first checks whether the object
-  // is marked and, if not, attempts to mark the object.
+         "Precondition: G1BarrierRS implies obj is non-NULL");
+
+  assert(_worker_id == _par_scan_state->queue_num(), "sanity");
 
   // here the null check is implicit in the cset_fast_test() test
   if (_g1->in_cset_fast_test(obj)) {
+    oop forwardee;
     if (obj->is_forwarded()) {
-      oopDesc::encode_store_heap_oop(p, obj->forwardee());
-      // If we are a root scanning closure during an initial
-      // mark pause (i.e. do_mark_object will be true) then
-      // we also need to handle marking of roots in the
-      // event of an evacuation failure. In the event of an
-      // evacuation failure, the object is forwarded to itself
-      // and not copied. For root-scanning closures, the
-      // object would be marked after a successful self-forward
-      // but an object could be pointed to by both a root and non
-      // root location and be self-forwarded by a non-root-scanning
-      // closure. Therefore we also have to attempt to mark the
-      // self-forwarded root object here.
-      if (do_mark_object && obj->forwardee() == obj) {
-        mark_object(p);
-      }
+      forwardee = obj->forwardee();
     } else {
-      // During an initial mark pause, objects that are pointed to
-      // by the roots need to be marked - even in the event of an
-      // evacuation failure. We pass the template parameter
-      // do_mark_object (which is true for root scanning closures
-      // during an initial mark pause) to copy_to_survivor_space
-      // which will pass it on to the evacuation failure handling
-      // code. The thread that successfully self-forwards a root
-      // object to itself is responsible for marking the object.
-      bool should_mark_root = do_mark_object;
-
-      // We need to mark the copied object if we're a root scanning
-      // closure during an initial mark pause (i.e. do_mark_object
-      // will be true), or the object is already marked and we need
-      // to propagate the mark to the evacuated copy.
-      bool should_mark_copy = do_mark_object ||
-                              _during_initial_mark ||
-                              (_mark_in_progress && !_g1->is_obj_ill(obj));
-
-      oop copy_oop = copy_to_survivor_space(obj, should_mark_root,
-                                                 should_mark_copy);
-      oopDesc::encode_store_heap_oop(p, copy_oop);
+      forwardee = copy_to_survivor_space(obj);
     }
+    assert(forwardee != NULL, "forwardee should not be NULL");
+    oopDesc::encode_store_heap_oop(p, forwardee);
+    if (do_mark_object && forwardee != obj) {
+      // If the object is self-forwarded we don't need to explicitly
+      // mark it, the evacuation failure protocol will do so.
+      mark_forwarded_object(obj, forwardee);
+    }
+
     // When scanning the RS, we only care about objs in CS.
     if (barrier == G1BarrierRS) {
-      _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num());
+      _par_scan_state->update_rs(_from, p, _worker_id);
     }
   } else {
     // The object is not in collection set. If we're a root scanning
     // closure during an initial mark pause (i.e. do_mark_object will
     // be true) then attempt to mark the object.
-    if (do_mark_object) {
-      mark_object(p);
+    if (do_mark_object && _g1->is_in_g1_reserved(obj)) {
+      mark_object(obj);
     }
   }
 
   if (barrier == G1BarrierEvac && obj != NULL) {
-    _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num());
+    _par_scan_state->update_rs(_from, p, _worker_id);
   }
 
   if (do_gen_barrier && obj != NULL) {
@@ -4567,35 +4518,51 @@
 
 template <class T> void G1ParScanPartialArrayClosure::do_oop_nv(T* p) {
   assert(has_partial_array_mask(p), "invariant");
-  oop old = clear_partial_array_mask(p);
-  assert(old->is_objArray(), "must be obj array");
-  assert(old->is_forwarded(), "must be forwarded");
-  assert(Universe::heap()->is_in_reserved(old), "must be in heap.");
-
-  objArrayOop obj = objArrayOop(old->forwardee());
-  assert((void*)old != (void*)old->forwardee(), "self forwarding here?");
-  // Process ParGCArrayScanChunk elements now
-  // and push the remainder back onto queue
-  int start     = arrayOop(old)->length();
-  int end       = obj->length();
-  int remainder = end - start;
-  assert(start <= end, "just checking");
+  oop from_obj = clear_partial_array_mask(p);
+
+  assert(Universe::heap()->is_in_reserved(from_obj), "must be in heap.");
+  assert(from_obj->is_objArray(), "must be obj array");
+  objArrayOop from_obj_array = objArrayOop(from_obj);
+  // The from-space object contains the real length.
+  int length                 = from_obj_array->length();
+
+  assert(from_obj->is_forwarded(), "must be forwarded");
+  oop to_obj                 = from_obj->forwardee();
+  assert(from_obj != to_obj, "should not be chunking self-forwarded objects");
+  objArrayOop to_obj_array   = objArrayOop(to_obj);
+  // We keep track of the next start index in the length field of the
+  // to-space object.
+  int next_index             = to_obj_array->length();
+  assert(0 <= next_index && next_index < length,
+         err_msg("invariant, next index: %d, length: %d", next_index, length));
+
+  int start                  = next_index;
+  int end                    = length;
+  int remainder              = end - start;
+  // We'll try not to push a range that's smaller than ParGCArrayScanChunk.
   if (remainder > 2 * ParGCArrayScanChunk) {
-    // Test above combines last partial chunk with a full chunk
     end = start + ParGCArrayScanChunk;
-    arrayOop(old)->set_length(end);
-    // Push remainder.
-    oop* old_p = set_partial_array_mask(old);
-    assert(arrayOop(old)->length() < obj->length(), "Empty push?");
-    _par_scan_state->push_on_queue(old_p);
+    to_obj_array->set_length(end);
+    // Push the remainder before we process the range in case another
+    // worker has run out of things to do and can steal it.
+    oop* from_obj_p = set_partial_array_mask(from_obj);
+    _par_scan_state->push_on_queue(from_obj_p);
   } else {
-    // Restore length so that the heap remains parsable in
-    // case of evacuation failure.
-    arrayOop(old)->set_length(end);
-  }
-  _scanner.set_region(_g1->heap_region_containing_raw(obj));
-  // process our set of indices (include header in first chunk)
-  obj->oop_iterate_range(&_scanner, start, end);
+    assert(length == end, "sanity");
+    // We'll process the final range for this object. Restore the length
+    // so that the heap remains parsable in case of evacuation failure.
+    to_obj_array->set_length(end);
+  }
+  _scanner.set_region(_g1->heap_region_containing_raw(to_obj));
+  // Process indexes [start,end). It will also process the header
+  // along with the first chunk (i.e., the chunk with start == 0).
+  // Note that at this point the length field of to_obj_array is not
+  // correct given that we are using it to keep track of the next
+  // start index. oop_iterate_range() (thankfully!) ignores the length
+  // field and only relies on the start / end parameters.  It does
+  // however return the size of the object which will be incorrect. So
+  // we have to ignore it even if we wanted to use it.
+  to_obj_array->oop_iterate_range(&_scanner, start, end);
 }
 
 class G1ParEvacuateFollowersClosure : public VoidClosure {
@@ -4660,7 +4627,7 @@
   G1CollectedHeap*       _g1h;
   RefToScanQueueSet      *_queues;
   ParallelTaskTerminator _terminator;
-  int _n_workers;
+  uint _n_workers;
 
   Mutex _stats_lock;
   Mutex* stats_lock() { return &_stats_lock; }
@@ -4700,18 +4667,18 @@
     _n_workers = active_workers;
   }
 
-  void work(int i) {
-    if (i >= _n_workers) return;  // no work needed this round
+  void work(uint worker_id) {
+    if (worker_id >= _n_workers) return;  // no work needed this round
 
     double start_time_ms = os::elapsedTime() * 1000.0;
-    _g1h->g1_policy()->record_gc_worker_start_time(i, start_time_ms);
+    _g1h->g1_policy()->record_gc_worker_start_time(worker_id, start_time_ms);
 
     ResourceMark rm;
     HandleMark   hm;
 
     ReferenceProcessor*             rp = _g1h->ref_processor_stw();
 
-    G1ParScanThreadState            pss(_g1h, i);
+    G1ParScanThreadState            pss(_g1h, worker_id);
     G1ParScanHeapEvacClosure        scan_evac_cl(_g1h, &pss, rp);
     G1ParScanHeapEvacFailureClosure evac_failure_cl(_g1h, &pss, rp);
     G1ParScanPartialArrayClosure    partial_scan_cl(_g1h, &pss, rp);
@@ -4743,7 +4710,7 @@
                                   scan_root_cl,
                                   &push_heap_rs_cl,
                                   scan_perm_cl,
-                                  i);
+                                  worker_id);
     pss.end_strong_roots();
 
     {
@@ -4752,8 +4719,8 @@
       evac.do_void();
       double elapsed_ms = (os::elapsedTime()-start)*1000.0;
       double term_ms = pss.term_time()*1000.0;
-      _g1h->g1_policy()->record_obj_copy_time(i, elapsed_ms-term_ms);
-      _g1h->g1_policy()->record_termination(i, term_ms, pss.term_attempts());
+      _g1h->g1_policy()->record_obj_copy_time(worker_id, elapsed_ms-term_ms);
+      _g1h->g1_policy()->record_termination(worker_id, term_ms, pss.term_attempts());
     }
     _g1h->g1_policy()->record_thread_age_table(pss.age_table());
     _g1h->update_surviving_young_words(pss.surviving_young_words()+1);
@@ -4763,12 +4730,12 @@
 
     if (ParallelGCVerbose) {
       MutexLocker x(stats_lock());
-      pss.print_termination_stats(i);
+      pss.print_termination_stats(worker_id);
     }
 
     assert(pss.refs()->is_empty(), "should be empty");
     double end_time_ms = os::elapsedTime() * 1000.0;
-    _g1h->g1_policy()->record_gc_worker_end_time(i, end_time_ms);
+    _g1h->g1_policy()->record_gc_worker_end_time(worker_id, end_time_ms);
   }
 };
 
@@ -4779,7 +4746,7 @@
 void
 G1CollectedHeap::
 g1_process_strong_roots(bool collecting_perm_gen,
-                        SharedHeap::ScanningOption so,
+                        ScanningOption so,
                         OopClosure* scan_non_heap_roots,
                         OopsInHeapRegionClosure* scan_rs,
                         OopsInGenClosure* scan_perm,
@@ -4828,12 +4795,16 @@
 
   g1_policy()->record_ext_root_scan_time(worker_i, ext_root_time_ms);
 
-  // Scan strong roots in mark stack.
-  if (!_process_strong_tasks->is_task_claimed(G1H_PS_mark_stack_oops_do)) {
-    concurrent_mark()->oops_do(scan_non_heap_roots);
-  }
-  double mark_stack_scan_ms = (os::elapsedTime() - ext_roots_end) * 1000.0;
-  g1_policy()->record_mark_stack_scan_time(worker_i, mark_stack_scan_ms);
+  // During conc marking we have to filter the per-thread SATB buffers
+  // to make sure we remove any oops into the CSet (which will show up
+  // as implicitly live).
+  if (!_process_strong_tasks->is_task_claimed(G1H_PS_filter_satb_buffers)) {
+    if (mark_in_progress()) {
+      JavaThread::satb_mark_queue_set().filter_thread_buffers();
+    }
+  }
+  double satb_filtering_ms = (os::elapsedTime() - ext_roots_end) * 1000.0;
+  g1_policy()->record_satb_filtering_time(worker_i, satb_filtering_ms);
 
   // Now scan the complement of the collection set.
   if (scan_rs != NULL) {
@@ -5026,14 +4997,14 @@
     _terminator(terminator)
   {}
 
-  virtual void work(int i) {
+  virtual void work(uint worker_id) {
     // The reference processing task executed by a single worker.
     ResourceMark rm;
     HandleMark   hm;
 
     G1STWIsAliveClosure is_alive(_g1h);
 
-    G1ParScanThreadState pss(_g1h, i);
+    G1ParScanThreadState pss(_g1h, worker_id);
 
     G1ParScanHeapEvacClosure        scan_evac_cl(_g1h, &pss, NULL);
     G1ParScanHeapEvacFailureClosure evac_failure_cl(_g1h, &pss, NULL);
@@ -5065,7 +5036,7 @@
     G1ParEvacuateFollowersClosure drain_queue(_g1h, &pss, _task_queues, _terminator);
 
     // Call the reference processing task's work routine.
-    _proc_task.work(i, is_alive, keep_alive, drain_queue);
+    _proc_task.work(worker_id, is_alive, keep_alive, drain_queue);
 
     // Note we cannot assert that the refs array is empty here as not all
     // of the processing tasks (specifically phase2 - pp2_work) execute
@@ -5100,8 +5071,8 @@
     _enq_task(enq_task)
   { }
 
-  virtual void work(int i) {
-    _enq_task.work(i);
+  virtual void work(uint worker_id) {
+    _enq_task.work(worker_id);
   }
 };
 
@@ -5130,7 +5101,7 @@
   G1CollectedHeap* _g1h;
   RefToScanQueueSet      *_queues;
   ParallelTaskTerminator _terminator;
-  int _n_workers;
+  uint _n_workers;
 
 public:
   G1ParPreserveCMReferentsTask(G1CollectedHeap* g1h,int workers, RefToScanQueueSet *task_queues) :
@@ -5141,11 +5112,11 @@
     _n_workers(workers)
   { }
 
-  void work(int i) {
+  void work(uint worker_id) {
     ResourceMark rm;
     HandleMark   hm;
 
-    G1ParScanThreadState            pss(_g1h, i);
+    G1ParScanThreadState            pss(_g1h, worker_id);
     G1ParScanHeapEvacClosure        scan_evac_cl(_g1h, &pss, NULL);
     G1ParScanHeapEvacFailureClosure evac_failure_cl(_g1h, &pss, NULL);
     G1ParScanPartialArrayClosure    partial_scan_cl(_g1h, &pss, NULL);
@@ -5181,17 +5152,17 @@
 
     ReferenceProcessor* rp = _g1h->ref_processor_cm();
 
-    int limit = ReferenceProcessor::number_of_subclasses_of_ref() * rp->max_num_q();
-    int stride = MIN2(MAX2(_n_workers, 1), limit);
+    uint limit = ReferenceProcessor::number_of_subclasses_of_ref() * rp->max_num_q();
+    uint stride = MIN2(MAX2(_n_workers, 1U), limit);
 
     // limit is set using max_num_q() - which was set using ParallelGCThreads.
     // So this must be true - but assert just in case someone decides to
     // change the worker ids.
-    assert(0 <= i && i < limit, "sanity");
+    assert(0 <= worker_id && worker_id < limit, "sanity");
     assert(!rp->discovery_is_atomic(), "check this code");
 
     // Select discovered lists [i, i+stride, i+2*stride,...,limit)
-    for (int idx = i; idx < limit; idx += stride) {
+    for (uint idx = worker_id; idx < limit; idx += stride) {
       DiscoveredList& ref_list = rp->discovered_refs()[idx];
 
       DiscoveredListIterator iter(ref_list, &keep_alive, &always_alive);
@@ -5245,11 +5216,13 @@
   // referents points to another object which is also referenced by an
   // object discovered by the STW ref processor.
 
-  int active_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
+  uint active_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
                         workers()->active_workers() : 1);
 
-  assert(active_workers == workers()->active_workers(),
-         "Need to reset active_workers");
+  assert(!G1CollectedHeap::use_parallel_gc_threads() ||
+           active_workers == workers()->active_workers(),
+           "Need to reset active_workers");
+
   set_par_threads(active_workers);
   G1ParPreserveCMReferentsTask keep_cm_referents(this, active_workers, _task_queues);
 
@@ -5349,7 +5322,7 @@
   } else {
     // Parallel reference enqueuing
 
-    int active_workers = (ParallelGCThreads > 0 ? workers()->active_workers() : 1);
+    uint active_workers = (ParallelGCThreads > 0 ? workers()->active_workers() : 1);
     assert(active_workers == workers()->active_workers(),
            "Need to reset active_workers");
     assert(rp->num_q() == active_workers, "sanity");
@@ -5372,13 +5345,14 @@
 }
 
 void G1CollectedHeap::evacuate_collection_set() {
+  _expand_heap_after_alloc_failure = true;
   set_evacuation_failed(false);
 
   g1_rem_set()->prepare_for_oops_into_collection_set_do();
   concurrent_g1_refine()->set_use_cache(false);
   concurrent_g1_refine()->clear_hot_cache_claimed_index();
 
-  int n_workers;
+  uint n_workers;
   if (G1CollectedHeap::use_parallel_gc_threads()) {
     n_workers =
       AdaptiveSizePolicy::calc_active_workers(workers()->total_workers(),
@@ -5387,13 +5361,13 @@
     assert(UseDynamicNumberOfGCThreads ||
            n_workers == workers()->total_workers(),
            "If not dynamic should be using all the  workers");
+    workers()->set_active_workers(n_workers);
     set_par_threads(n_workers);
   } else {
     assert(n_par_threads() == 0,
            "Should be the original non-parallel value");
     n_workers = 1;
   }
-  workers()->set_active_workers(n_workers);
 
   G1ParTask g1_par_task(this, _task_queues);
 
@@ -5415,6 +5389,7 @@
     workers()->run_task(&g1_par_task);
   } else {
     StrongRootsScope srs(this);
+    g1_par_task.set_for_termination(n_workers);
     g1_par_task.work(0);
   }
 
@@ -5448,13 +5423,6 @@
 
   finalize_for_evac_failure();
 
-  // Must do this before clearing the per-region evac-failure flags
-  // (which is currently done when we free the collection set).
-  // We also only do this if marking is actually in progress and so
-  // have to do this before we set the mark_in_progress flag at the
-  // end of an initial mark pause.
-  concurrent_mark()->complete_marking_in_collection_set();
-
   if (evacuation_failed()) {
     remove_self_forwarding_pointers();
     if (PrintGCDetails) {
@@ -5590,7 +5558,7 @@
     AbstractGangTask("G1 Par Cleanup CT Task"),
     _ct_bs(ct_bs), _g1h(g1h) { }
 
-  void work(int i) {
+  void work(uint worker_id) {
     HeapRegion* r;
     while (r = _g1h->pop_dirty_cards_region()) {
       clear_cards(r);
@@ -5663,8 +5631,8 @@
     // Iterate over the dirty cards region list.
     G1ParCleanupCTTask cleanup_task(ct_bs, this);
 
-    if (ParallelGCThreads > 0) {
-      set_par_threads(workers()->total_workers());
+    if (G1CollectedHeap::use_parallel_gc_threads()) {
+      set_par_threads();
       workers()->run_task(&cleanup_task);
       set_par_threads(0);
     } else {
@@ -5767,16 +5735,6 @@
 
       // And the region is empty.
       assert(!used_mr.is_empty(), "Should not have empty regions in a CS.");
-
-      // If marking is in progress then clear any objects marked in
-      // the current region. Note mark_in_progress() returns false,
-      // even during an initial mark pause, until the set_marking_started()
-      // call which takes place later in the pause.
-      if (mark_in_progress()) {
-        assert(!g1_policy()->during_initial_mark_pause(), "sanity");
-        _cm->nextMarkBitMap()->clearRange(used_mr);
-      }
-
       free_region(cur, &pre_used, &local_free_list, false /* par */);
     } else {
       cur->uninstall_surv_rate_group();
@@ -5843,8 +5801,9 @@
 }
 
 void G1CollectedHeap::reset_free_regions_coming() {
+  assert(free_regions_coming(), "pre-condition");
+
   {
-    assert(free_regions_coming(), "pre-condition");
     MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
     _free_regions_coming = false;
     SecondaryFreeList_lock->notify_all();
@@ -6072,8 +6031,9 @@
 void G1CollectedHeap::set_par_threads() {
   // Don't change the number of workers.  Use the value previously set
   // in the workgroup.
-  int n_workers = workers()->active_workers();
-    assert(UseDynamicNumberOfGCThreads ||
+  assert(G1CollectedHeap::use_parallel_gc_threads(), "shouldn't be here otherwise");
+  uint n_workers = workers()->active_workers();
+  assert(UseDynamicNumberOfGCThreads ||
            n_workers == workers()->total_workers(),
       "Otherwise should be using the total number of workers");
   if (n_workers == 0) {
@@ -6110,6 +6070,8 @@
       } else {
         _hr_printer.alloc(new_alloc_region, G1HRPrinter::Old);
       }
+      bool during_im = g1_policy()->during_initial_mark_pause();
+      new_alloc_region->note_start_of_copying(during_im);
       return new_alloc_region;
     } else {
       g1_policy()->note_alloc_region_limit_reached(ap);
@@ -6121,7 +6083,8 @@
 void G1CollectedHeap::retire_gc_alloc_region(HeapRegion* alloc_region,
                                              size_t allocated_bytes,
                                              GCAllocPurpose ap) {
-  alloc_region->note_end_of_copying();
+  bool during_im = g1_policy()->during_initial_mark_pause();
+  alloc_region->note_end_of_copying(during_im);
   g1_policy()->record_bytes_copied_during_gc(allocated_bytes);
   if (ap == GCAllocForSurvived) {
     young_list()->add_survivor_region(alloc_region);
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -285,6 +285,14 @@
   // Typically, it is not full so we should re-use it during the next GC.
   HeapRegion* _retained_old_gc_alloc_region;
 
+  // It specifies whether we should attempt to expand the heap after a
+  // region allocation failure. If heap expansion fails we set this to
+  // false so that we don't re-attempt the heap expansion (it's likely
+  // that subsequent expansion attempts will also fail if one fails).
+  // Currently, it is only consulted during GC and it's reset at the
+  // start of each GC.
+  bool _expand_heap_after_alloc_failure;
+
   // It resets the mutator alloc region before new allocations can take place.
   void init_mutator_alloc_region();
 
@@ -347,6 +355,7 @@
   // explicitly started if:
   // (a) cause == _gc_locker and +GCLockerInvokesConcurrent, or
   // (b) cause == _java_lang_system_gc and +ExplicitGCInvokesConcurrent.
+  // (c) cause == _g1_humongous_allocation
   bool should_do_concurrent_full_gc(GCCause::Cause cause);
 
   // Keeps track of how many "full collections" (i.e., Full GCs or
@@ -761,7 +770,7 @@
   // the "i" of the calling parallel worker thread's work(i) function.
   // In the sequential case this param will be ignored.
   void g1_process_strong_roots(bool collecting_perm_gen,
-                               SharedHeap::ScanningOption so,
+                               ScanningOption so,
                                OopClosure* scan_non_heap_roots,
                                OopsInHeapRegionClosure* scan_rs,
                                OopsInGenClosure* scan_perm,
@@ -861,8 +870,7 @@
   void finalize_for_evac_failure();
 
   // An attempt to evacuate "obj" has failed; take necessary steps.
-  oop handle_evacuation_failure_par(OopsInHeapRegionClosure* cl, oop obj,
-                                    bool should_mark_root);
+  oop handle_evacuation_failure_par(OopsInHeapRegionClosure* cl, oop obj);
   void handle_evacuation_failure_common(oop obj, markOop m);
 
   // ("Weak") Reference processing support.
@@ -943,8 +951,18 @@
   // discovery.
   G1CMIsAliveClosure _is_alive_closure_cm;
 
+  // Cache used by G1CollectedHeap::start_cset_region_for_worker().
+  HeapRegion** _worker_cset_start_region;
+
+  // Time stamp to validate the regions recorded in the cache
+  // used by G1CollectedHeap::start_cset_region_for_worker().
+  // The heap region entry for a given worker is valid iff
+  // the associated time stamp value matches the current value
+  // of G1CollectedHeap::_gc_time_stamp.
+  unsigned int* _worker_cset_start_region_time_stamp;
+
   enum G1H_process_strong_roots_tasks {
-    G1H_PS_mark_stack_oops_do,
+    G1H_PS_filter_satb_buffers,
     G1H_PS_refProcessor_oops_do,
     // Leave this one last.
     G1H_PS_NumElements
@@ -985,7 +1003,7 @@
   // Initialize weak reference processing.
   virtual void ref_processing_init();
 
-  void set_par_threads(int t) {
+  void set_par_threads(uint t) {
     SharedHeap::set_par_threads(t);
     // Done in SharedHeap but oddly there are
     // two _process_strong_tasks's in a G1CollectedHeap
@@ -1030,6 +1048,9 @@
   void reset_gc_time_stamp() {
     _gc_time_stamp = 0;
     OrderAccess::fence();
+    // Clear the cached CSet starting regions and time stamps.
+    // Their validity is dependent on the GC timestamp.
+    clear_cset_start_regions();
   }
 
   void increment_gc_time_stamp() {
@@ -1152,11 +1173,21 @@
     _old_set.remove(hr);
   }
 
+  size_t non_young_capacity_bytes() {
+    return _old_set.total_capacity_bytes() + _humongous_set.total_capacity_bytes();
+  }
+
   void set_free_regions_coming();
   void reset_free_regions_coming();
   bool free_regions_coming() { return _free_regions_coming; }
   void wait_while_free_regions_coming();
 
+  // Determine whether the given region is one that we are using as an
+  // old GC alloc region.
+  bool is_old_gc_alloc_region(HeapRegion* hr) {
+    return hr == _retained_old_gc_alloc_region;
+  }
+
   // Perform a collection of the heap; intended for use in implementing
   // "System.gc".  This probably implies as full a collection as the
   // "CollectedHeap" supports.
@@ -1196,7 +1227,7 @@
                                        HumongousRegionSet* humongous_proxy_set,
                                        bool par);
 
-  // Returns "TRUE" iff "p" points into the allocated area of the heap.
+  // Returns "TRUE" iff "p" points into the committed areas of the heap.
   virtual bool is_in(const void* p) const;
 
   // Return "TRUE" iff the given object address is within the collection
@@ -1285,13 +1316,17 @@
   // chunk.)  For now requires that "doHeapRegion" always returns "false",
   // i.e., that a closure never attempt to abort a traversal.
   void heap_region_par_iterate_chunked(HeapRegionClosure* blk,
-                                       int worker,
-                                       int no_of_par_workers,
+                                       uint worker,
+                                       uint no_of_par_workers,
                                        jint claim_value);
 
   // It resets all the region claim values to the default.
   void reset_heap_region_claim_values();
 
+  // Resets the claim values of regions in the current
+  // collection set to the default.
+  void reset_cset_heap_region_claim_values();
+
 #ifdef ASSERT
   bool check_heap_region_claim_values(jint claim_value);
 
@@ -1300,9 +1335,12 @@
   bool check_cset_heap_region_claim_values(jint claim_value);
 #endif // ASSERT
 
-  // Given the id of a worker, calculate a suitable
-  // starting region for iterating over the current
-  // collection set.
+  // Clear the cached cset start regions and (more importantly)
+  // the time stamps. Called when we reset the GC time stamp.
+  void clear_cset_start_regions();
+
+  // Given the id of a worker, obtain or calculate a suitable
+  // starting region for iterating over the current collection set.
   HeapRegion* start_cset_region_for_worker(int worker_i);
 
   // Iterate over the regions (if any) in the current collection set.
@@ -1630,8 +1668,6 @@
 public:
   void stop_conc_gc_threads();
 
-  double predict_region_elapsed_time_ms(HeapRegion* hr, bool young);
-  void check_if_region_is_too_expensive(double predicted_time_ms);
   size_t pending_card_num();
   size_t max_pending_card_num();
   size_t cards_scanned();
@@ -1724,10 +1760,8 @@
       _gclab_word_size(gclab_word_size),
       _real_start_word(NULL),
       _real_end_word(NULL),
-      _start_word(NULL)
-  {
-    guarantee( size_in_words() >= bitmap_size_in_words(),
-               "just making sure");
+      _start_word(NULL) {
+    guarantee(false, "GCLabBitMap::GCLabBitmap(): don't call this any more");
   }
 
   inline unsigned heapWordToOffset(HeapWord* addr) {
@@ -1781,6 +1815,8 @@
   }
 
   void set_buffer(HeapWord* start) {
+    guarantee(false, "set_buffer(): don't call this any more");
+
     guarantee(use_local_bitmaps, "invariant");
     clear();
 
@@ -1804,6 +1840,8 @@
 #endif // PRODUCT
 
   void retire() {
+    guarantee(false, "retire(): don't call this any more");
+
     guarantee(use_local_bitmaps, "invariant");
     assert(fields_well_formed(), "invariant");
 
@@ -1837,32 +1875,18 @@
 class G1ParGCAllocBuffer: public ParGCAllocBuffer {
 private:
   bool        _retired;
-  bool        _should_mark_objects;
-  GCLabBitMap _bitmap;
 
 public:
   G1ParGCAllocBuffer(size_t gclab_word_size);
 
-  inline bool mark(HeapWord* addr) {
-    guarantee(use_local_bitmaps, "invariant");
-    assert(_should_mark_objects, "invariant");
-    return _bitmap.mark(addr);
-  }
-
-  inline void set_buf(HeapWord* buf) {
-    if (use_local_bitmaps && _should_mark_objects) {
-      _bitmap.set_buffer(buf);
-    }
+  void set_buf(HeapWord* buf) {
     ParGCAllocBuffer::set_buf(buf);
     _retired = false;
   }
 
-  inline void retire(bool end_of_gc, bool retain) {
+  void retire(bool end_of_gc, bool retain) {
     if (_retired)
       return;
-    if (use_local_bitmaps && _should_mark_objects) {
-      _bitmap.retire();
-    }
     ParGCAllocBuffer::retire(end_of_gc, retain);
     _retired = true;
   }
@@ -1889,7 +1913,7 @@
   G1ParScanPartialArrayClosure* _partial_scan_cl;
 
   int _hash_seed;
-  int _queue_num;
+  uint _queue_num;
 
   size_t _term_attempts;
 
@@ -1933,7 +1957,7 @@
   }
 
 public:
-  G1ParScanThreadState(G1CollectedHeap* g1h, int queue_num);
+  G1ParScanThreadState(G1CollectedHeap* g1h, uint queue_num);
 
   ~G1ParScanThreadState() {
     FREE_C_HEAP_ARRAY(size_t, _surviving_young_words_base);
@@ -2025,7 +2049,7 @@
   }
 
   int* hash_seed() { return &_hash_seed; }
-  int  queue_num() { return _queue_num; }
+  uint queue_num() { return _queue_num; }
 
   size_t term_attempts() const  { return _term_attempts; }
   void note_term_attempt() { _term_attempts++; }
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -50,7 +50,7 @@
 };
 
 // all the same
-static double fully_young_cards_per_entry_ratio_defaults[] = {
+static double young_cards_per_entry_ratio_defaults[] = {
   1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0
 };
 
@@ -136,12 +136,12 @@
   _stop_world_start(0.0),
   _all_stop_world_times_ms(new NumberSeq()),
   _all_yield_times_ms(new NumberSeq()),
-  _using_new_ratio_calculations(false),
 
   _summary(new Summary()),
 
   _cur_clear_ct_time_ms(0.0),
   _mark_closure_time_ms(0.0),
+  _root_region_scan_wait_time_ms(0.0),
 
   _cur_ref_proc_time_ms(0.0),
   _cur_ref_enq_time_ms(0.0),
@@ -168,11 +168,10 @@
   _pending_card_diff_seq(new TruncatedSeq(TruncatedSeqLength)),
   _rs_length_diff_seq(new TruncatedSeq(TruncatedSeqLength)),
   _cost_per_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
-  _fully_young_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)),
-  _partially_young_cards_per_entry_ratio_seq(
-                                         new TruncatedSeq(TruncatedSeqLength)),
+  _young_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _mixed_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)),
   _cost_per_entry_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
-  _partially_young_cost_per_entry_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _mixed_cost_per_entry_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
   _cost_per_byte_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
   _cost_per_byte_ms_during_cm_seq(new TruncatedSeq(TruncatedSeqLength)),
   _constant_other_time_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
@@ -185,9 +184,9 @@
 
   _pause_time_target_ms((double) MaxGCPauseMillis),
 
-  _full_young_gcs(true),
-  _full_young_pause_num(0),
-  _partial_young_pause_num(0),
+  _gcs_are_young(true),
+  _young_pause_num(0),
+  _mixed_pause_num(0),
 
   _during_marking(false),
   _in_marking_window(false),
@@ -198,7 +197,8 @@
 
   _young_gc_eff_seq(new TruncatedSeq(TruncatedSeqLength)),
 
-   _recent_prev_end_times_for_all_gcs_sec(new TruncatedSeq(NumPrevPausesForHeuristics)),
+  _recent_prev_end_times_for_all_gcs_sec(
+                                new TruncatedSeq(NumPrevPausesForHeuristics)),
 
   _recent_avg_pause_time_ratio(0.0),
 
@@ -206,15 +206,13 @@
 
   _initiate_conc_mark_if_possible(false),
   _during_initial_mark_pause(false),
-  _should_revert_to_full_young_gcs(false),
-  _last_full_young_gc(false),
+  _last_young_gc(false),
+  _last_gc_was_young(false),
 
   _eden_bytes_before_gc(0),
   _survivor_bytes_before_gc(0),
   _capacity_before_gc(0),
 
-  _prev_collection_pause_used_at_end_bytes(0),
-
   _eden_cset_region_length(0),
   _survivor_cset_region_length(0),
   _old_cset_region_length(0),
@@ -229,7 +227,9 @@
   _inc_cset_bytes_used_before(0),
   _inc_cset_max_finger(NULL),
   _inc_cset_recorded_rs_lengths(0),
+  _inc_cset_recorded_rs_lengths_diffs(0),
   _inc_cset_predicted_elapsed_time_ms(0.0),
+  _inc_cset_predicted_elapsed_time_ms_diffs(0.0),
 
 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
@@ -279,7 +279,7 @@
 
   _par_last_gc_worker_start_times_ms = new double[_parallel_gc_threads];
   _par_last_ext_root_scan_times_ms = new double[_parallel_gc_threads];
-  _par_last_mark_stack_scan_times_ms = new double[_parallel_gc_threads];
+  _par_last_satb_filtering_times_ms = new double[_parallel_gc_threads];
 
   _par_last_update_rs_times_ms = new double[_parallel_gc_threads];
   _par_last_update_rs_processed_buffers = new double[_parallel_gc_threads];
@@ -294,9 +294,6 @@
   _par_last_gc_worker_times_ms = new double[_parallel_gc_threads];
   _par_last_gc_worker_other_times_ms = new double[_parallel_gc_threads];
 
-  // start conservatively
-  _expensive_region_limit_ms = 0.5 * (double) MaxGCPauseMillis;
-
   int index;
   if (ParallelGCThreads == 0)
     index = 0;
@@ -308,8 +305,8 @@
   _pending_card_diff_seq->add(0.0);
   _rs_length_diff_seq->add(rs_length_diff_defaults[index]);
   _cost_per_card_ms_seq->add(cost_per_card_ms_defaults[index]);
-  _fully_young_cards_per_entry_ratio_seq->add(
-                            fully_young_cards_per_entry_ratio_defaults[index]);
+  _young_cards_per_entry_ratio_seq->add(
+                                  young_cards_per_entry_ratio_defaults[index]);
   _cost_per_entry_ms_seq->add(cost_per_entry_ms_defaults[index]);
   _cost_per_byte_ms_seq->add(cost_per_byte_ms_defaults[index]);
   _constant_other_time_ms_seq->add(constant_other_time_ms_defaults[index]);
@@ -406,11 +403,7 @@
 
   initialize_all();
   _collectionSetChooser = new CollectionSetChooser();
-}
-
-// Increment "i", mod "len"
-static void inc_mod(int& i, int len) {
-  i++; if (i == len) i = 0;
+  _young_gen_sizer = new G1YoungGenSizer(); // Must be after call to initialize_flags
 }
 
 void G1CollectorPolicy::initialize_flags() {
@@ -422,39 +415,74 @@
   CollectorPolicy::initialize_flags();
 }
 
-// The easiest way to deal with the parsing of the NewSize /
-// MaxNewSize / etc. parameteres is to re-use the code in the
-// TwoGenerationCollectorPolicy class. This is similar to what
-// ParallelScavenge does with its GenerationSizer class (see
-// ParallelScavengeHeap::initialize()). We might change this in the
-// future, but it's a good start.
-class G1YoungGenSizer : public TwoGenerationCollectorPolicy {
-private:
-  size_t size_to_region_num(size_t byte_size) {
-    return MAX2((size_t) 1, byte_size / HeapRegion::GrainBytes);
+G1YoungGenSizer::G1YoungGenSizer() : _sizer_kind(SizerDefaults), _adaptive_size(true) {
+  assert(G1DefaultMinNewGenPercent <= G1DefaultMaxNewGenPercent, "Min larger than max");
+  assert(G1DefaultMinNewGenPercent > 0 && G1DefaultMinNewGenPercent < 100, "Min out of bounds");
+  assert(G1DefaultMaxNewGenPercent > 0 && G1DefaultMaxNewGenPercent < 100, "Max out of bounds");
+
+  if (FLAG_IS_CMDLINE(NewRatio)) {
+    if (FLAG_IS_CMDLINE(NewSize) || FLAG_IS_CMDLINE(MaxNewSize)) {
+      warning("-XX:NewSize and -XX:MaxNewSize override -XX:NewRatio");
+    } else {
+      _sizer_kind = SizerNewRatio;
+      _adaptive_size = false;
+      return;
+    }
   }
 
-public:
-  G1YoungGenSizer() {
-    initialize_flags();
-    initialize_size_info();
+  if (FLAG_IS_CMDLINE(NewSize)) {
+     _min_desired_young_length = MAX2((size_t) 1, NewSize / HeapRegion::GrainBytes);
+    if (FLAG_IS_CMDLINE(MaxNewSize)) {
+      _max_desired_young_length = MAX2((size_t) 1, MaxNewSize / HeapRegion::GrainBytes);
+      _sizer_kind = SizerMaxAndNewSize;
+      _adaptive_size = _min_desired_young_length == _max_desired_young_length;
+    } else {
+      _sizer_kind = SizerNewSizeOnly;
+    }
+  } else if (FLAG_IS_CMDLINE(MaxNewSize)) {
+    _max_desired_young_length = MAX2((size_t) 1, MaxNewSize / HeapRegion::GrainBytes);
+    _sizer_kind = SizerMaxNewSizeOnly;
   }
-  size_t min_young_region_num() {
-    return size_to_region_num(_min_gen0_size);
-  }
-  size_t initial_young_region_num() {
-    return size_to_region_num(_initial_gen0_size);
+}
+
+size_t G1YoungGenSizer::calculate_default_min_length(size_t new_number_of_heap_regions) {
+  size_t default_value = (new_number_of_heap_regions * G1DefaultMinNewGenPercent) / 100;
+  return MAX2((size_t)1, default_value);
+}
+
+size_t G1YoungGenSizer::calculate_default_max_length(size_t new_number_of_heap_regions) {
+  size_t default_value = (new_number_of_heap_regions * G1DefaultMaxNewGenPercent) / 100;
+  return MAX2((size_t)1, default_value);
+}
+
+void G1YoungGenSizer::heap_size_changed(size_t new_number_of_heap_regions) {
+  assert(new_number_of_heap_regions > 0, "Heap must be initialized");
+
+  switch (_sizer_kind) {
+    case SizerDefaults:
+      _min_desired_young_length = calculate_default_min_length(new_number_of_heap_regions);
+      _max_desired_young_length = calculate_default_max_length(new_number_of_heap_regions);
+      break;
+    case SizerNewSizeOnly:
+      _max_desired_young_length = calculate_default_max_length(new_number_of_heap_regions);
+      _max_desired_young_length = MAX2(_min_desired_young_length, _max_desired_young_length);
+      break;
+    case SizerMaxNewSizeOnly:
+      _min_desired_young_length = calculate_default_min_length(new_number_of_heap_regions);
+      _min_desired_young_length = MIN2(_min_desired_young_length, _max_desired_young_length);
+      break;
+    case SizerMaxAndNewSize:
+      // Do nothing. Values set on the command line, don't update them at runtime.
+      break;
+    case SizerNewRatio:
+      _min_desired_young_length = new_number_of_heap_regions / (NewRatio + 1);
+      _max_desired_young_length = _min_desired_young_length;
+      break;
+    default:
+      ShouldNotReachHere();
   }
-  size_t max_young_region_num() {
-    return size_to_region_num(_max_gen0_size);
-  }
-};
-
-void G1CollectorPolicy::update_young_list_size_using_newratio(size_t number_of_heap_regions) {
-  assert(number_of_heap_regions > 0, "Heap must be initialized");
-  size_t young_size = number_of_heap_regions / (NewRatio + 1);
-  _min_desired_young_length = young_size;
-  _max_desired_young_length = young_size;
+
+  assert(_min_desired_young_length <= _max_desired_young_length, "Invalid min/max young gen size values");
 }
 
 void G1CollectorPolicy::init() {
@@ -465,28 +493,10 @@
 
   initialize_gc_policy_counters();
 
-  G1YoungGenSizer sizer;
-  _min_desired_young_length = sizer.min_young_region_num();
-  _max_desired_young_length = sizer.max_young_region_num();
-
-  if (FLAG_IS_CMDLINE(NewRatio)) {
-    if (FLAG_IS_CMDLINE(NewSize) || FLAG_IS_CMDLINE(MaxNewSize)) {
-      warning("-XX:NewSize and -XX:MaxNewSize override -XX:NewRatio");
-    } else {
-      // Treat NewRatio as a fixed size that is only recalculated when the heap size changes
-      update_young_list_size_using_newratio(_g1->n_regions());
-      _using_new_ratio_calculations = true;
-    }
-  }
-
-  assert(_min_desired_young_length <= _max_desired_young_length, "Invalid min/max young gen size values");
-
-  set_adaptive_young_list_length(_min_desired_young_length < _max_desired_young_length);
   if (adaptive_young_list_length()) {
     _young_list_fixed_length = 0;
   } else {
-    assert(_min_desired_young_length == _max_desired_young_length, "Min and max young size differ");
-    _young_list_fixed_length = _min_desired_young_length;
+    _young_list_fixed_length = _young_gen_sizer->min_desired_young_length();
   }
   _free_regions_at_end_of_collection = _g1->free_regions();
   update_young_list_target_length();
@@ -540,11 +550,7 @@
   // smaller than 1.0) we'll get 1.
   _reserve_regions = (size_t) ceil(reserve_regions_d);
 
-  if (_using_new_ratio_calculations) {
-    // -XX:NewRatio was specified so we need to update the
-    // young gen length when the heap size has changed.
-    update_young_list_size_using_newratio(new_number_of_regions);
-  }
+  _young_gen_sizer->heap_size_changed(new_number_of_regions);
 }
 
 size_t G1CollectorPolicy::calculate_young_list_desired_min_length(
@@ -562,14 +568,14 @@
   }
   desired_min_length += base_min_length;
   // make sure we don't go below any user-defined minimum bound
-  return MAX2(_min_desired_young_length, desired_min_length);
+  return MAX2(_young_gen_sizer->min_desired_young_length(), desired_min_length);
 }
 
 size_t G1CollectorPolicy::calculate_young_list_desired_max_length() {
   // Here, we might want to also take into account any additional
   // constraints (i.e., user-defined minimum bound). Currently, we
   // effectively don't set this bound.
-  return _max_desired_young_length;
+  return _young_gen_sizer->max_desired_young_length();
 }
 
 void G1CollectorPolicy::update_young_list_target_length(size_t rs_lengths) {
@@ -606,7 +612,7 @@
 
   size_t young_list_target_length = 0;
   if (adaptive_young_list_length()) {
-    if (full_young_gcs()) {
+    if (gcs_are_young()) {
       young_list_target_length =
                         calculate_young_list_target_length(rs_lengths,
                                                            base_min_length,
@@ -619,16 +625,9 @@
       // possible to maximize how many old regions we can add to it.
     }
   } else {
-    if (full_young_gcs()) {
-      young_list_target_length = _young_list_fixed_length;
-    } else {
-      // A bit arbitrary: during partially-young GCs we allocate half
-      // the young regions to try to add old regions to the CSet.
-      young_list_target_length = _young_list_fixed_length / 2;
-      // We choose to accept that we might go under the desired min
-      // length given that we intentionally ask for a smaller young gen.
-      desired_min_length = absolute_min_length;
-    }
+    // The user asked for a fixed young gen so we'll fix the young gen
+    // whether the next GC is young or mixed.
+    young_list_target_length = _young_list_fixed_length;
   }
 
   // Make sure we don't go over the desired max length, nor under the
@@ -655,7 +654,7 @@
                                                    size_t desired_min_length,
                                                    size_t desired_max_length) {
   assert(adaptive_young_list_length(), "pre-condition");
-  assert(full_young_gcs(), "only call this for fully-young GCs");
+  assert(gcs_are_young(), "only call this for young GCs");
 
   // In case some edge-condition makes the desired max length too small...
   if (desired_max_length <= desired_min_length) {
@@ -858,12 +857,10 @@
 
   _g1->clear_full_collection();
 
-  // "Nuke" the heuristics that control the fully/partially young GC
-  // transitions and make sure we start with fully young GCs after the
-  // Full GC.
-  set_full_young_gcs(true);
-  _last_full_young_gc = false;
-  _should_revert_to_full_young_gcs = false;
+  // "Nuke" the heuristics that control the young/mixed GC
+  // transitions and make sure we start with young GCs after the Full GC.
+  set_gcs_are_young(true);
+  _last_young_gc = false;
   clear_initiate_conc_mark_if_possible();
   clear_during_initial_mark_pause();
   _known_garbage_bytes = 0;
@@ -880,7 +877,7 @@
   // Reset survivors SurvRateGroup.
   _survivor_surv_rate_group->reset();
   update_young_list_target_length();
-  _collectionSetChooser->updateAfterFullCollection();
+  _collectionSetChooser->clearMarkedHeapRegions();
 }
 
 void G1CollectorPolicy::record_stop_world_start() {
@@ -892,7 +889,7 @@
   if (PrintGCDetails) {
     gclog_or_tty->stamp(PrintGCTimeStamps);
     gclog_or_tty->print("[GC pause");
-    gclog_or_tty->print(" (%s)", full_young_gcs() ? "young" : "partial");
+    gclog_or_tty->print(" (%s)", gcs_are_young() ? "young" : "mixed");
   }
 
   // We only need to do this here as the policy will only be applied
@@ -929,7 +926,7 @@
   for (int i = 0; i < _parallel_gc_threads; ++i) {
     _par_last_gc_worker_start_times_ms[i] = -1234.0;
     _par_last_ext_root_scan_times_ms[i] = -1234.0;
-    _par_last_mark_stack_scan_times_ms[i] = -1234.0;
+    _par_last_satb_filtering_times_ms[i] = -1234.0;
     _par_last_update_rs_times_ms[i] = -1234.0;
     _par_last_update_rs_processed_buffers[i] = -1234.0;
     _par_last_scan_rs_times_ms[i] = -1234.0;
@@ -950,8 +947,11 @@
   // This is initialized to zero here and is set during
   // the evacuation pause if marking is in progress.
   _cur_satb_drain_time_ms = 0.0;
-
-  _last_young_gc_full = false;
+  // This is initialized to zero here and is set during the evacuation
+  // pause if we actually waited for the root region scanning to finish.
+  _root_region_scan_wait_time_ms = 0.0;
+
+  _last_gc_was_young = false;
 
   // do that for any other surv rate groups
   _short_lived_surv_rate_group->stop_adding_regions();
@@ -988,8 +988,7 @@
 }
 
 void G1CollectorPolicy::record_concurrent_mark_cleanup_completed() {
-  _should_revert_to_full_young_gcs = false;
-  _last_full_young_gc = true;
+  _last_young_gc = true;
   _in_marking_window = false;
 }
 
@@ -1121,6 +1120,50 @@
   return ret;
 }
 
+bool G1CollectorPolicy::need_to_start_conc_mark(const char* source, size_t alloc_word_size) {
+  if (_g1->concurrent_mark()->cmThread()->during_cycle()) {
+    return false;
+  }
+
+  size_t marking_initiating_used_threshold =
+    (_g1->capacity() / 100) * InitiatingHeapOccupancyPercent;
+  size_t cur_used_bytes = _g1->non_young_capacity_bytes();
+  size_t alloc_byte_size = alloc_word_size * HeapWordSize;
+
+  if ((cur_used_bytes + alloc_byte_size) > marking_initiating_used_threshold) {
+    if (gcs_are_young()) {
+      ergo_verbose5(ErgoConcCycles,
+        "request concurrent cycle initiation",
+        ergo_format_reason("occupancy higher than threshold")
+        ergo_format_byte("occupancy")
+        ergo_format_byte("allocation request")
+        ergo_format_byte_perc("threshold")
+        ergo_format_str("source"),
+        cur_used_bytes,
+        alloc_byte_size,
+        marking_initiating_used_threshold,
+        (double) InitiatingHeapOccupancyPercent,
+        source);
+      return true;
+    } else {
+      ergo_verbose5(ErgoConcCycles,
+        "do not request concurrent cycle initiation",
+        ergo_format_reason("still doing mixed collections")
+        ergo_format_byte("occupancy")
+        ergo_format_byte("allocation request")
+        ergo_format_byte_perc("threshold")
+        ergo_format_str("source"),
+        cur_used_bytes,
+        alloc_byte_size,
+        marking_initiating_used_threshold,
+        (double) InitiatingHeapOccupancyPercent,
+        source);
+    }
+  }
+
+  return false;
+}
+
 // Anything below that is considered to be zero
 #define MIN_TIMER_GRANULARITY 0.0000001
 
@@ -1147,45 +1190,15 @@
 #endif // PRODUCT
 
   last_pause_included_initial_mark = during_initial_mark_pause();
-  if (last_pause_included_initial_mark)
+  if (last_pause_included_initial_mark) {
     record_concurrent_mark_init_end(0.0);
-
-  size_t marking_initiating_used_threshold =
-    (_g1->capacity() / 100) * InitiatingHeapOccupancyPercent;
-
-  if (!_g1->mark_in_progress() && !_last_full_young_gc) {
-    assert(!last_pause_included_initial_mark, "invariant");
-    if (cur_used_bytes > marking_initiating_used_threshold) {
-      if (cur_used_bytes > _prev_collection_pause_used_at_end_bytes) {
-        assert(!during_initial_mark_pause(), "we should not see this here");
-
-        ergo_verbose3(ErgoConcCycles,
-                      "request concurrent cycle initiation",
-                      ergo_format_reason("occupancy higher than threshold")
-                      ergo_format_byte("occupancy")
-                      ergo_format_byte_perc("threshold"),
-                      cur_used_bytes,
-                      marking_initiating_used_threshold,
-                      (double) InitiatingHeapOccupancyPercent);
-
-        // Note: this might have already been set, if during the last
-        // pause we decided to start a cycle but at the beginning of
-        // this pause we decided to postpone it. That's OK.
-        set_initiate_conc_mark_if_possible();
-      } else {
-        ergo_verbose2(ErgoConcCycles,
-                  "do not request concurrent cycle initiation",
-                  ergo_format_reason("occupancy lower than previous occupancy")
-                  ergo_format_byte("occupancy")
-                  ergo_format_byte("previous occupancy"),
-                  cur_used_bytes,
-                  _prev_collection_pause_used_at_end_bytes);
-      }
-    }
+  } else if (!_last_young_gc && need_to_start_conc_mark("end of GC")) {
+    // Note: this might have already been set, if during the last
+    // pause we decided to start a cycle but at the beginning of
+    // this pause we decided to postpone it. That's OK.
+    set_initiate_conc_mark_if_possible();
   }
 
-  _prev_collection_pause_used_at_end_bytes = cur_used_bytes;
-
   _mmu_tracker->add_pause(end_time_sec - elapsed_ms/1000.0,
                           end_time_sec, false);
 
@@ -1217,7 +1230,7 @@
   // of the PrintGCDetails output, in the non-parallel case.
 
   double ext_root_scan_time = avg_value(_par_last_ext_root_scan_times_ms);
-  double mark_stack_scan_time = avg_value(_par_last_mark_stack_scan_times_ms);
+  double satb_filtering_time = avg_value(_par_last_satb_filtering_times_ms);
   double update_rs_time = avg_value(_par_last_update_rs_times_ms);
   double update_rs_processed_buffers =
     sum_of_values(_par_last_update_rs_processed_buffers);
@@ -1226,7 +1239,7 @@
   double termination_time = avg_value(_par_last_termination_times_ms);
 
   double known_time = ext_root_scan_time +
-                      mark_stack_scan_time +
+                      satb_filtering_time +
                       update_rs_time +
                       scan_rs_time +
                       obj_copy_time;
@@ -1238,6 +1251,10 @@
   // is in progress.
   other_time_ms -= _cur_satb_drain_time_ms;
 
+  // Subtract the root region scanning wait time. It's initialized to
+  // zero at the start of the pause.
+  other_time_ms -= _root_region_scan_wait_time_ms;
+
   if (parallel) {
     other_time_ms -= _cur_collection_par_time_ms;
   } else {
@@ -1270,9 +1287,11 @@
     // each other. Therefore we unconditionally record the SATB drain
     // time - even if it's zero.
     body_summary->record_satb_drain_time_ms(_cur_satb_drain_time_ms);
+    body_summary->record_root_region_scan_wait_time_ms(
+                                               _root_region_scan_wait_time_ms);
 
     body_summary->record_ext_root_scan_time_ms(ext_root_scan_time);
-    body_summary->record_mark_stack_scan_time_ms(mark_stack_scan_time);
+    body_summary->record_satb_filtering_time_ms(satb_filtering_time);
     body_summary->record_update_rs_time_ms(update_rs_time);
     body_summary->record_scan_rs_time_ms(scan_rs_time);
     body_summary->record_obj_copy_time_ms(obj_copy_time);
@@ -1366,16 +1385,15 @@
                            (last_pause_included_initial_mark) ? " (initial-mark)" : "",
                            elapsed_ms / 1000.0);
 
-    if (print_marking_info) {
-      print_stats(1, "SATB Drain Time", _cur_satb_drain_time_ms);
+    if (_root_region_scan_wait_time_ms > 0.0) {
+      print_stats(1, "Root Region Scan Waiting", _root_region_scan_wait_time_ms);
     }
-
     if (parallel) {
       print_stats(1, "Parallel Time", _cur_collection_par_time_ms);
       print_par_stats(2, "GC Worker Start", _par_last_gc_worker_start_times_ms);
       print_par_stats(2, "Ext Root Scanning", _par_last_ext_root_scan_times_ms);
       if (print_marking_info) {
-        print_par_stats(2, "Mark Stack Scanning", _par_last_mark_stack_scan_times_ms);
+        print_par_stats(2, "SATB Filtering", _par_last_satb_filtering_times_ms);
       }
       print_par_stats(2, "Update RS", _par_last_update_rs_times_ms);
       print_par_sizes(3, "Processed Buffers", _par_last_update_rs_processed_buffers);
@@ -1389,7 +1407,7 @@
         _par_last_gc_worker_times_ms[i] = _par_last_gc_worker_end_times_ms[i] - _par_last_gc_worker_start_times_ms[i];
 
         double worker_known_time = _par_last_ext_root_scan_times_ms[i] +
-                                   _par_last_mark_stack_scan_times_ms[i] +
+                                   _par_last_satb_filtering_times_ms[i] +
                                    _par_last_update_rs_times_ms[i] +
                                    _par_last_scan_rs_times_ms[i] +
                                    _par_last_obj_copy_times_ms[i] +
@@ -1402,7 +1420,7 @@
     } else {
       print_stats(1, "Ext Root Scanning", ext_root_scan_time);
       if (print_marking_info) {
-        print_stats(1, "Mark Stack Scanning", mark_stack_scan_time);
+        print_stats(1, "SATB Filtering", satb_filtering_time);
       }
       print_stats(1, "Update RS", update_rs_time);
       print_stats(2, "Processed Buffers", (int)update_rs_processed_buffers);
@@ -1458,57 +1476,34 @@
     new_in_marking_window_im = true;
   }
 
-  if (_last_full_young_gc) {
+  if (_last_young_gc) {
+    // This is supposed to to be the "last young GC" before we start
+    // doing mixed GCs. Here we decide whether to start mixed GCs or not.
+
     if (!last_pause_included_initial_mark) {
-      ergo_verbose2(ErgoPartiallyYoungGCs,
-                    "start partially-young GCs",
-                    ergo_format_byte_perc("known garbage"),
-                    _known_garbage_bytes, _known_garbage_ratio * 100.0);
-      set_full_young_gcs(false);
+      if (next_gc_should_be_mixed("start mixed GCs",
+                                  "do not start mixed GCs")) {
+        set_gcs_are_young(false);
+      }
     } else {
-      ergo_verbose0(ErgoPartiallyYoungGCs,
-                    "do not start partially-young GCs",
+      ergo_verbose0(ErgoMixedGCs,
+                    "do not start mixed GCs",
                     ergo_format_reason("concurrent cycle is about to start"));
     }
-    _last_full_young_gc = false;
+    _last_young_gc = false;
   }
 
-  if ( !_last_young_gc_full ) {
-    if (_should_revert_to_full_young_gcs) {
-      ergo_verbose2(ErgoPartiallyYoungGCs,
-                    "end partially-young GCs",
-                    ergo_format_reason("partially-young GCs end requested")
-                    ergo_format_byte_perc("known garbage"),
-                    _known_garbage_bytes, _known_garbage_ratio * 100.0);
-      set_full_young_gcs(true);
-    } else if (_known_garbage_ratio < 0.05) {
-      ergo_verbose3(ErgoPartiallyYoungGCs,
-               "end partially-young GCs",
-               ergo_format_reason("known garbage percent lower than threshold")
-               ergo_format_byte_perc("known garbage")
-               ergo_format_perc("threshold"),
-               _known_garbage_bytes, _known_garbage_ratio * 100.0,
-               0.05 * 100.0);
-      set_full_young_gcs(true);
-    } else if (adaptive_young_list_length() &&
-              (get_gc_eff_factor() * cur_efficiency < predict_young_gc_eff())) {
-      ergo_verbose5(ErgoPartiallyYoungGCs,
-                    "end partially-young GCs",
-                    ergo_format_reason("current GC efficiency lower than "
-                                       "predicted fully-young GC efficiency")
-                    ergo_format_double("GC efficiency factor")
-                    ergo_format_double("current GC efficiency")
-                    ergo_format_double("predicted fully-young GC efficiency")
-                    ergo_format_byte_perc("known garbage"),
-                    get_gc_eff_factor(), cur_efficiency,
-                    predict_young_gc_eff(),
-                    _known_garbage_bytes, _known_garbage_ratio * 100.0);
-      set_full_young_gcs(true);
+  if (!_last_gc_was_young) {
+    // This is a mixed GC. Here we decide whether to continue doing
+    // mixed GCs or not.
+
+    if (!next_gc_should_be_mixed("continue mixed GCs",
+                                 "do not continue mixed GCs")) {
+      set_gcs_are_young(true);
     }
   }
-  _should_revert_to_full_young_gcs = false;
-
-  if (_last_young_gc_full && !_during_marking) {
+
+  if (_last_gc_was_young && !_during_marking) {
     _young_gc_eff_seq->add(cur_efficiency);
   }
 
@@ -1534,25 +1529,36 @@
     double cost_per_entry_ms = 0.0;
     if (cards_scanned > 10) {
       cost_per_entry_ms = scan_rs_time / (double) cards_scanned;
-      if (_last_young_gc_full)
+      if (_last_gc_was_young) {
         _cost_per_entry_ms_seq->add(cost_per_entry_ms);
-      else
-        _partially_young_cost_per_entry_ms_seq->add(cost_per_entry_ms);
+      } else {
+        _mixed_cost_per_entry_ms_seq->add(cost_per_entry_ms);
+      }
     }
 
     if (_max_rs_lengths > 0) {
       double cards_per_entry_ratio =
         (double) cards_scanned / (double) _max_rs_lengths;
-      if (_last_young_gc_full)
-        _fully_young_cards_per_entry_ratio_seq->add(cards_per_entry_ratio);
-      else
-        _partially_young_cards_per_entry_ratio_seq->add(cards_per_entry_ratio);
+      if (_last_gc_was_young) {
+        _young_cards_per_entry_ratio_seq->add(cards_per_entry_ratio);
+      } else {
+        _mixed_cards_per_entry_ratio_seq->add(cards_per_entry_ratio);
+      }
     }
 
-    // It turns out that, sometimes, _max_rs_lengths can get smaller
-    // than _recorded_rs_lengths which causes rs_length_diff to get
-    // very large and mess up the RSet length predictions. We'll be
-    // defensive until we work out why this happens.
+    // This is defensive. For a while _max_rs_lengths could get
+    // smaller than _recorded_rs_lengths which was causing
+    // rs_length_diff to get very large and mess up the RSet length
+    // predictions. The reason was unsafe concurrent updates to the
+    // _inc_cset_recorded_rs_lengths field which the code below guards
+    // against (see CR 7118202). This bug has now been fixed (see CR
+    // 7119027). However, I'm still worried that
+    // _inc_cset_recorded_rs_lengths might still end up somewhat
+    // inaccurate. The concurrent refinement thread calculates an
+    // RSet's length concurrently with other CR threads updating it
+    // which might cause it to calculate the length incorrectly (if,
+    // say, it's in mid-coarsening). So I'll leave in the defensive
+    // conditional below just in case.
     size_t rs_length_diff = 0;
     if (_max_rs_lengths > _recorded_rs_lengths) {
       rs_length_diff = _max_rs_lengths - _recorded_rs_lengths;
@@ -1563,10 +1569,11 @@
     double cost_per_byte_ms = 0.0;
     if (copied_bytes > 0) {
       cost_per_byte_ms = obj_copy_time / (double) copied_bytes;
-      if (_in_marking_window)
+      if (_in_marking_window) {
         _cost_per_byte_ms_during_cm_seq->add(cost_per_byte_ms);
-      else
+      } else {
         _cost_per_byte_ms_seq->add(cost_per_byte_ms);
+      }
     }
 
     double all_other_time_ms = pause_time_ms -
@@ -1603,15 +1610,6 @@
 
     _pending_cards_seq->add((double) _pending_cards);
     _rs_lengths_seq->add((double) _max_rs_lengths);
-
-    double expensive_region_limit_ms =
-      (double) MaxGCPauseMillis - predict_constant_other_time_ms();
-    if (expensive_region_limit_ms < 0.0) {
-      // this means that the other time was predicted to be longer than
-      // than the max pause time
-      expensive_region_limit_ms = (double) MaxGCPauseMillis;
-    }
-    _expensive_region_limit_ms = expensive_region_limit_ms;
   }
 
   _in_marking_window = new_in_marking_window;
@@ -1722,10 +1720,11 @@
   size_t rs_lengths = g1h->young_list()->sampled_rs_lengths() +
                       predict_rs_length_diff();
   size_t card_num;
-  if (full_young_gcs())
+  if (gcs_are_young()) {
     card_num = predict_young_card_num(rs_lengths);
-  else
+  } else {
     card_num = predict_non_young_card_num(rs_lengths);
+  }
   size_t young_byte_size = young_num * HeapRegion::GrainBytes;
   double accum_yg_surv_rate =
     _short_lived_surv_rate_group->accum_surv_rate(adjustment);
@@ -1745,10 +1744,11 @@
 G1CollectorPolicy::predict_base_elapsed_time_ms(size_t pending_cards) {
   size_t rs_length = predict_rs_length_diff();
   size_t card_num;
-  if (full_young_gcs())
+  if (gcs_are_young()) {
     card_num = predict_young_card_num(rs_length);
-  else
+  } else {
     card_num = predict_non_young_card_num(rs_length);
+  }
   return predict_base_elapsed_time_ms(pending_cards, card_num);
 }
 
@@ -1766,10 +1766,11 @@
                                                   bool young) {
   size_t rs_length = hr->rem_set()->occupied();
   size_t card_num;
-  if (full_young_gcs())
+  if (gcs_are_young()) {
     card_num = predict_young_card_num(rs_length);
-  else
+  } else {
     card_num = predict_non_young_card_num(rs_length);
+  }
   size_t bytes_to_copy = predict_bytes_to_copy(hr);
 
   double region_elapsed_time_ms =
@@ -1790,13 +1791,11 @@
   if (hr->is_marked())
     bytes_to_copy = hr->max_live_bytes();
   else {
-    guarantee( hr->is_young() && hr->age_in_surv_rate_group() != -1,
-               "invariant" );
+    assert(hr->is_young() && hr->age_in_surv_rate_group() != -1, "invariant");
     int age = hr->age_in_surv_rate_group();
     double yg_surv_rate = predict_yg_surv_rate(age, hr->surv_rate_group());
     bytes_to_copy = (size_t) ((double) hr->used() * yg_surv_rate);
   }
-
   return bytes_to_copy;
 }
 
@@ -1812,22 +1811,6 @@
   _recorded_rs_lengths = rs_lengths;
 }
 
-void G1CollectorPolicy::check_if_region_is_too_expensive(double
-                                                           predicted_time_ms) {
-  // I don't think we need to do this when in young GC mode since
-  // marking will be initiated next time we hit the soft limit anyway...
-  if (predicted_time_ms > _expensive_region_limit_ms) {
-    ergo_verbose2(ErgoPartiallyYoungGCs,
-              "request partially-young GCs end",
-              ergo_format_reason("predicted region time higher than threshold")
-              ergo_format_ms("predicted region time")
-              ergo_format_ms("threshold"),
-              predicted_time_ms, _expensive_region_limit_ms);
-    // no point in doing another partial one
-    _should_revert_to_full_young_gcs = true;
-  }
-}
-
 void G1CollectorPolicy::update_recent_gc_times(double end_time_sec,
                                                double elapsed_ms) {
   _recent_gc_times_ms->add(elapsed_ms);
@@ -1958,11 +1941,11 @@
   if (summary->get_total_seq()->num() > 0) {
     print_summary_sd(0, "Evacuation Pauses", summary->get_total_seq());
     if (body_summary != NULL) {
-      print_summary(1, "SATB Drain", body_summary->get_satb_drain_seq());
+      print_summary(1, "Root Region Scan Wait", body_summary->get_root_region_scan_wait_seq());
       if (parallel) {
         print_summary(1, "Parallel Time", body_summary->get_parallel_seq());
         print_summary(2, "Ext Root Scanning", body_summary->get_ext_root_scan_seq());
-        print_summary(2, "Mark Stack Scanning", body_summary->get_mark_stack_scan_seq());
+        print_summary(2, "SATB Filtering", body_summary->get_satb_filtering_seq());
         print_summary(2, "Update RS", body_summary->get_update_rs_seq());
         print_summary(2, "Scan RS", body_summary->get_scan_rs_seq());
         print_summary(2, "Object Copy", body_summary->get_obj_copy_seq());
@@ -1971,7 +1954,7 @@
         {
           NumberSeq* other_parts[] = {
             body_summary->get_ext_root_scan_seq(),
-            body_summary->get_mark_stack_scan_seq(),
+            body_summary->get_satb_filtering_seq(),
             body_summary->get_update_rs_seq(),
             body_summary->get_scan_rs_seq(),
             body_summary->get_obj_copy_seq(),
@@ -1984,7 +1967,7 @@
         }
       } else {
         print_summary(1, "Ext Root Scanning", body_summary->get_ext_root_scan_seq());
-        print_summary(1, "Mark Stack Scanning", body_summary->get_mark_stack_scan_seq());
+        print_summary(1, "SATB Filtering", body_summary->get_satb_filtering_seq());
         print_summary(1, "Update RS", body_summary->get_update_rs_seq());
         print_summary(1, "Scan RS", body_summary->get_scan_rs_seq());
         print_summary(1, "Object Copy", body_summary->get_obj_copy_seq());
@@ -2000,23 +1983,25 @@
           // parallel
           NumberSeq* other_parts[] = {
             body_summary->get_satb_drain_seq(),
+            body_summary->get_root_region_scan_wait_seq(),
             body_summary->get_parallel_seq(),
             body_summary->get_clear_ct_seq()
           };
           calc_other_times_ms = NumberSeq(summary->get_total_seq(),
-                                                3, other_parts);
+                                          4, other_parts);
         } else {
           // serial
           NumberSeq* other_parts[] = {
             body_summary->get_satb_drain_seq(),
+            body_summary->get_root_region_scan_wait_seq(),
             body_summary->get_update_rs_seq(),
             body_summary->get_ext_root_scan_seq(),
-            body_summary->get_mark_stack_scan_seq(),
+            body_summary->get_satb_filtering_seq(),
             body_summary->get_scan_rs_seq(),
             body_summary->get_obj_copy_seq()
           };
           calc_other_times_ms = NumberSeq(summary->get_total_seq(),
-                                                6, other_parts);
+                                          7, other_parts);
         }
         check_other_times(1,  summary->get_other_seq(), &calc_other_times_ms);
       }
@@ -2033,8 +2018,8 @@
     print_summary_sd(0, "Total", _all_pause_times_ms);
     gclog_or_tty->print_cr("");
     gclog_or_tty->print_cr("");
-    gclog_or_tty->print_cr("   Full Young GC Pauses:    %8d", _full_young_pause_num);
-    gclog_or_tty->print_cr("   Partial Young GC Pauses: %8d", _partial_young_pause_num);
+    gclog_or_tty->print_cr("   Young GC Pauses: %8d", _young_pause_num);
+    gclog_or_tty->print_cr("   Mixed GC Pauses: %8d", _mixed_pause_num);
     gclog_or_tty->print_cr("");
 
     gclog_or_tty->print_cr("EVACUATION PAUSES");
@@ -2188,11 +2173,11 @@
       // initiate a new cycle.
 
       set_during_initial_mark_pause();
-      // We do not allow non-full young GCs during marking.
-      if (!full_young_gcs()) {
-        set_full_young_gcs(true);
-        ergo_verbose0(ErgoPartiallyYoungGCs,
-                      "end partially-young GCs",
+      // We do not allow mixed GCs during marking.
+      if (!gcs_are_young()) {
+        set_gcs_are_young(true);
+        ergo_verbose0(ErgoMixedGCs,
+                      "end mixed GCs",
                       ergo_format_reason("concurrent cycle is about to start"));
       }
 
@@ -2224,12 +2209,12 @@
 }
 
 class KnownGarbageClosure: public HeapRegionClosure {
+  G1CollectedHeap* _g1h;
   CollectionSetChooser* _hrSorted;
 
 public:
   KnownGarbageClosure(CollectionSetChooser* hrSorted) :
-    _hrSorted(hrSorted)
-  {}
+    _g1h(G1CollectedHeap::heap()), _hrSorted(hrSorted) { }
 
   bool doHeapRegion(HeapRegion* r) {
     // We only include humongous regions in collection
@@ -2238,11 +2223,10 @@
 
     // Do we have any marking information for this region?
     if (r->is_marked()) {
-      // We don't include humongous regions in collection
-      // sets because we collect them immediately at the end of a marking
-      // cycle.  We also don't include young regions because we *must*
-      // include them in the next collection pause.
-      if (!r->isHumongous() && !r->is_young()) {
+      // We will skip any region that's currently used as an old GC
+      // alloc region (we should not consider those for collection
+      // before we fill them up).
+      if (_hrSorted->shouldAdd(r) && !_g1h->is_old_gc_alloc_region(r)) {
         _hrSorted->addMarkedHeapRegion(r);
       }
     }
@@ -2251,8 +2235,10 @@
 };
 
 class ParKnownGarbageHRClosure: public HeapRegionClosure {
+  G1CollectedHeap* _g1h;
   CollectionSetChooser* _hrSorted;
   jint _marked_regions_added;
+  size_t _reclaimable_bytes_added;
   jint _chunk_size;
   jint _cur_chunk_idx;
   jint _cur_chunk_end; // Cur chunk [_cur_chunk_idx, _cur_chunk_end)
@@ -2270,6 +2256,7 @@
     assert(_cur_chunk_idx < _cur_chunk_end, "postcondition");
     _hrSorted->setMarkedHeapRegion(_cur_chunk_idx, r);
     _marked_regions_added++;
+    _reclaimable_bytes_added += r->reclaimable_bytes();
     _cur_chunk_idx++;
   }
 
@@ -2277,10 +2264,10 @@
   ParKnownGarbageHRClosure(CollectionSetChooser* hrSorted,
                            jint chunk_size,
                            int worker) :
-    _hrSorted(hrSorted), _chunk_size(chunk_size), _worker(worker),
-    _marked_regions_added(0), _cur_chunk_idx(0), _cur_chunk_end(0),
-    _invokes(0)
-  {}
+      _g1h(G1CollectedHeap::heap()),
+      _hrSorted(hrSorted), _chunk_size(chunk_size), _worker(worker),
+      _marked_regions_added(0), _reclaimable_bytes_added(0),
+      _cur_chunk_idx(0), _cur_chunk_end(0), _invokes(0) { }
 
   bool doHeapRegion(HeapRegion* r) {
     // We only include humongous regions in collection
@@ -2290,17 +2277,17 @@
 
     // Do we have any marking information for this region?
     if (r->is_marked()) {
-      // We don't include humongous regions in collection
-      // sets because we collect them immediately at the end of a marking
-      // cycle.
-      // We also do not include young regions in collection sets
-      if (!r->isHumongous() && !r->is_young()) {
+      // We will skip any region that's currently used as an old GC
+      // alloc region (we should not consider those for collection
+      // before we fill them up).
+      if (_hrSorted->shouldAdd(r) && !_g1h->is_old_gc_alloc_region(r)) {
         add_region(r);
       }
     }
     return false;
   }
   jint marked_regions_added() { return _marked_regions_added; }
+  size_t reclaimable_bytes_added() { return _reclaimable_bytes_added; }
   int invokes() { return _invokes; }
 };
 
@@ -2312,20 +2299,23 @@
   ParKnownGarbageTask(CollectionSetChooser* hrSorted, jint chunk_size) :
     AbstractGangTask("ParKnownGarbageTask"),
     _hrSorted(hrSorted), _chunk_size(chunk_size),
-    _g1(G1CollectedHeap::heap())
-  {}
-
-  void work(int i) {
-    ParKnownGarbageHRClosure parKnownGarbageCl(_hrSorted, _chunk_size, i);
+    _g1(G1CollectedHeap::heap()) { }
+
+  void work(uint worker_id) {
+    ParKnownGarbageHRClosure parKnownGarbageCl(_hrSorted,
+                                               _chunk_size,
+                                               worker_id);
     // Back to zero for the claim value.
-    _g1->heap_region_par_iterate_chunked(&parKnownGarbageCl, i,
+    _g1->heap_region_par_iterate_chunked(&parKnownGarbageCl, worker_id,
                                          _g1->workers()->active_workers(),
                                          HeapRegion::InitialClaimValue);
     jint regions_added = parKnownGarbageCl.marked_regions_added();
-    _hrSorted->incNumMarkedHeapRegions(regions_added);
+    size_t reclaimable_bytes_added =
+                                   parKnownGarbageCl.reclaimable_bytes_added();
+    _hrSorted->updateTotals(regions_added, reclaimable_bytes_added);
     if (G1PrintParCleanupStats) {
       gclog_or_tty->print_cr("     Thread %d called %d times, added %d regions to list.",
-                 i, parKnownGarbageCl.invokes(), regions_added);
+                 worker_id, parKnownGarbageCl.invokes(), regions_added);
     }
   }
 };
@@ -2406,9 +2396,6 @@
   assert(_inc_cset_build_state == Active, "Precondition");
   assert(!hr->is_young(), "non-incremental add of young region");
 
-  if (_g1->mark_in_progress())
-    _g1->concurrent_mark()->registerCSetRegion(hr);
-
   assert(!hr->in_collection_set(), "should not already be in the CSet");
   hr->set_in_collection_set(true);
   hr->set_next_in_collection_set(_collection_set);
@@ -2430,10 +2417,45 @@
 
   _inc_cset_max_finger = 0;
   _inc_cset_recorded_rs_lengths = 0;
-  _inc_cset_predicted_elapsed_time_ms = 0;
+  _inc_cset_recorded_rs_lengths_diffs = 0;
+  _inc_cset_predicted_elapsed_time_ms = 0.0;
+  _inc_cset_predicted_elapsed_time_ms_diffs = 0.0;
   _inc_cset_build_state = Active;
 }
 
+void G1CollectorPolicy::finalize_incremental_cset_building() {
+  assert(_inc_cset_build_state == Active, "Precondition");
+  assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
+
+  // The two "main" fields, _inc_cset_recorded_rs_lengths and
+  // _inc_cset_predicted_elapsed_time_ms, are updated by the thread
+  // that adds a new region to the CSet. Further updates by the
+  // concurrent refinement thread that samples the young RSet lengths
+  // are accumulated in the *_diffs fields. Here we add the diffs to
+  // the "main" fields.
+
+  if (_inc_cset_recorded_rs_lengths_diffs >= 0) {
+    _inc_cset_recorded_rs_lengths += _inc_cset_recorded_rs_lengths_diffs;
+  } else {
+    // This is defensive. The diff should in theory be always positive
+    // as RSets can only grow between GCs. However, given that we
+    // sample their size concurrently with other threads updating them
+    // it's possible that we might get the wrong size back, which
+    // could make the calculations somewhat inaccurate.
+    size_t diffs = (size_t) (-_inc_cset_recorded_rs_lengths_diffs);
+    if (_inc_cset_recorded_rs_lengths >= diffs) {
+      _inc_cset_recorded_rs_lengths -= diffs;
+    } else {
+      _inc_cset_recorded_rs_lengths = 0;
+    }
+  }
+  _inc_cset_predicted_elapsed_time_ms +=
+                                     _inc_cset_predicted_elapsed_time_ms_diffs;
+
+  _inc_cset_recorded_rs_lengths_diffs = 0;
+  _inc_cset_predicted_elapsed_time_ms_diffs = 0.0;
+}
+
 void G1CollectorPolicy::add_to_incremental_cset_info(HeapRegion* hr, size_t rs_length) {
   // This routine is used when:
   // * adding survivor regions to the incremental cset at the end of an
@@ -2449,10 +2471,8 @@
 
   double region_elapsed_time_ms = predict_region_elapsed_time_ms(hr, true);
   size_t used_bytes = hr->used();
-
   _inc_cset_recorded_rs_lengths += rs_length;
   _inc_cset_predicted_elapsed_time_ms += region_elapsed_time_ms;
-
   _inc_cset_bytes_used_before += used_bytes;
 
   // Cache the values we have added to the aggregated informtion
@@ -2463,37 +2483,33 @@
   hr->set_predicted_elapsed_time_ms(region_elapsed_time_ms);
 }
 
-void G1CollectorPolicy::remove_from_incremental_cset_info(HeapRegion* hr) {
-  // This routine is currently only called as part of the updating of
-  // existing policy information for regions in the incremental cset that
-  // is performed by the concurrent refine thread(s) as part of young list
-  // RSet sampling. Therefore we should not be at a safepoint.
-
-  assert(!SafepointSynchronize::is_at_safepoint(), "should not be at safepoint");
-  assert(hr->is_young(), "it should be");
-
-  size_t used_bytes = hr->used();
-  size_t old_rs_length = hr->recorded_rs_length();
+void G1CollectorPolicy::update_incremental_cset_info(HeapRegion* hr,
+                                                     size_t new_rs_length) {
+  // Update the CSet information that is dependent on the new RS length
+  assert(hr->is_young(), "Precondition");
+  assert(!SafepointSynchronize::is_at_safepoint(),
+                                               "should not be at a safepoint");
+
+  // We could have updated _inc_cset_recorded_rs_lengths and
+  // _inc_cset_predicted_elapsed_time_ms directly but we'd need to do
+  // that atomically, as this code is executed by a concurrent
+  // refinement thread, potentially concurrently with a mutator thread
+  // allocating a new region and also updating the same fields. To
+  // avoid the atomic operations we accumulate these updates on two
+  // separate fields (*_diffs) and we'll just add them to the "main"
+  // fields at the start of a GC.
+
+  ssize_t old_rs_length = (ssize_t) hr->recorded_rs_length();
+  ssize_t rs_lengths_diff = (ssize_t) new_rs_length - old_rs_length;
+  _inc_cset_recorded_rs_lengths_diffs += rs_lengths_diff;
+
   double old_elapsed_time_ms = hr->predicted_elapsed_time_ms();
-
-  // Subtract the old recorded/predicted policy information for
-  // the given heap region from the collection set info.
-  _inc_cset_recorded_rs_lengths -= old_rs_length;
-  _inc_cset_predicted_elapsed_time_ms -= old_elapsed_time_ms;
-
-  _inc_cset_bytes_used_before -= used_bytes;
-
-  // Clear the values cached in the heap region
-  hr->set_recorded_rs_length(0);
-  hr->set_predicted_elapsed_time_ms(0);
-}
-
-void G1CollectorPolicy::update_incremental_cset_info(HeapRegion* hr, size_t new_rs_length) {
-  // Update the collection set information that is dependent on the new RS length
-  assert(hr->is_young(), "Precondition");
-
-  remove_from_incremental_cset_info(hr);
-  add_to_incremental_cset_info(hr, new_rs_length);
+  double new_region_elapsed_time_ms = predict_region_elapsed_time_ms(hr, true);
+  double elapsed_ms_diff = new_region_elapsed_time_ms - old_elapsed_time_ms;
+  _inc_cset_predicted_elapsed_time_ms_diffs += elapsed_ms_diff;
+
+  hr->set_recorded_rs_length(new_rs_length);
+  hr->set_predicted_elapsed_time_ms(new_region_elapsed_time_ms);
 }
 
 void G1CollectorPolicy::add_region_to_incremental_cset_common(HeapRegion* hr) {
@@ -2580,11 +2596,48 @@
 }
 #endif // !PRODUCT
 
-void G1CollectorPolicy::choose_collection_set(double target_pause_time_ms) {
+bool G1CollectorPolicy::next_gc_should_be_mixed(const char* true_action_str,
+                                                const char* false_action_str) {
+  CollectionSetChooser* cset_chooser = _collectionSetChooser;
+  if (cset_chooser->isEmpty()) {
+    ergo_verbose0(ErgoMixedGCs,
+                  false_action_str,
+                  ergo_format_reason("candidate old regions not available"));
+    return false;
+  }
+  size_t reclaimable_bytes = cset_chooser->remainingReclaimableBytes();
+  size_t capacity_bytes = _g1->capacity();
+  double perc = (double) reclaimable_bytes * 100.0 / (double) capacity_bytes;
+  double threshold = (double) G1OldReclaimableThresholdPercent;
+  if (perc < threshold) {
+    ergo_verbose4(ErgoMixedGCs,
+              false_action_str,
+              ergo_format_reason("reclaimable percentage lower than threshold")
+              ergo_format_region("candidate old regions")
+              ergo_format_byte_perc("reclaimable")
+              ergo_format_perc("threshold"),
+              cset_chooser->remainingRegions(),
+              reclaimable_bytes, perc, threshold);
+    return false;
+  }
+
+  ergo_verbose4(ErgoMixedGCs,
+                true_action_str,
+                ergo_format_reason("candidate old regions available")
+                ergo_format_region("candidate old regions")
+                ergo_format_byte_perc("reclaimable")
+                ergo_format_perc("threshold"),
+                cset_chooser->remainingRegions(),
+                reclaimable_bytes, perc, threshold);
+  return true;
+}
+
+void G1CollectorPolicy::finalize_cset(double target_pause_time_ms) {
   // Set this here - in case we're not doing young collections.
   double non_young_start_time_sec = os::elapsedTime();
 
   YoungList* young_list = _g1->young_list();
+  finalize_incremental_cset_building();
 
   guarantee(target_pause_time_ms > 0.0,
             err_msg("target_pause_time_ms = %1.6lf should be positive",
@@ -2593,7 +2646,6 @@
 
   double base_time_ms = predict_base_elapsed_time_ms(_pending_cards);
   double predicted_pause_time_ms = base_time_ms;
-
   double time_remaining_ms = target_pause_time_ms - base_time_ms;
 
   ergo_verbose3(ErgoCSetConstruction | ErgoHigh,
@@ -2603,32 +2655,16 @@
                 ergo_format_ms("target pause time"),
                 base_time_ms, time_remaining_ms, target_pause_time_ms);
 
-  // the 10% and 50% values are arbitrary...
-  double threshold = 0.10 * target_pause_time_ms;
-  if (time_remaining_ms < threshold) {
-    double prev_time_remaining_ms = time_remaining_ms;
-    time_remaining_ms = 0.50 * target_pause_time_ms;
-    ergo_verbose3(ErgoCSetConstruction,
-                  "adjust remaining time",
-                  ergo_format_reason("remaining time lower than threshold")
-                  ergo_format_ms("remaining time")
-                  ergo_format_ms("threshold")
-                  ergo_format_ms("adjusted remaining time"),
-                  prev_time_remaining_ms, threshold, time_remaining_ms);
-  }
-
-  size_t expansion_bytes = _g1->expansion_regions() * HeapRegion::GrainBytes;
-
   HeapRegion* hr;
   double young_start_time_sec = os::elapsedTime();
 
   _collection_set_bytes_used_before = 0;
-  _last_young_gc_full = full_young_gcs() ? true : false;
-
-  if (_last_young_gc_full) {
-    ++_full_young_pause_num;
+  _last_gc_was_young = gcs_are_young() ? true : false;
+
+  if (_last_gc_was_young) {
+    ++_young_pause_num;
   } else {
-    ++_partial_young_pause_num;
+    ++_mixed_pause_num;
   }
 
   // The young list is laid with the survivor regions from the previous
@@ -2648,9 +2684,6 @@
   // Clear the fields that point to the survivor list - they are all young now.
   young_list->clear_survivors();
 
-  if (_g1->mark_in_progress())
-    _g1->concurrent_mark()->register_collection_set_finger(_inc_cset_max_finger);
-
   _collection_set = _inc_cset_head;
   _collection_set_bytes_used_before = _inc_cset_bytes_used_before;
   time_remaining_ms -= _inc_cset_predicted_elapsed_time_ms;
@@ -2675,79 +2708,98 @@
   // We are doing young collections so reset this.
   non_young_start_time_sec = young_end_time_sec;
 
-  if (!full_young_gcs()) {
-    bool should_continue = true;
-    NumberSeq seq;
-    double avg_prediction = 100000000000000000.0; // something very large
-
-    double prev_predicted_pause_time_ms = predicted_pause_time_ms;
-    do {
-      // Note that add_old_region_to_cset() increments the
-      // _old_cset_region_length field and cset_region_length() returns the
-      // sum of _eden_cset_region_length, _survivor_cset_region_length, and
-      // _old_cset_region_length. So, as old regions are added to the
-      // CSet, _old_cset_region_length will be incremented and
-      // cset_region_length(), which is used below, will always reflect
-      // the the total number of regions added up to this point to the CSet.
-
-      hr = _collectionSetChooser->getNextMarkedRegion(time_remaining_ms,
-                                                      avg_prediction);
-      if (hr != NULL) {
-        _g1->old_set_remove(hr);
-        double predicted_time_ms = predict_region_elapsed_time_ms(hr, false);
-        time_remaining_ms -= predicted_time_ms;
-        predicted_pause_time_ms += predicted_time_ms;
-        add_old_region_to_cset(hr);
-        seq.add(predicted_time_ms);
-        avg_prediction = seq.avg() + seq.sd();
+  if (!gcs_are_young()) {
+    CollectionSetChooser* cset_chooser = _collectionSetChooser;
+    assert(cset_chooser->verify(), "CSet Chooser verification - pre");
+    const size_t min_old_cset_length = cset_chooser->calcMinOldCSetLength();
+    const size_t max_old_cset_length = cset_chooser->calcMaxOldCSetLength();
+
+    size_t expensive_region_num = 0;
+    bool check_time_remaining = adaptive_young_list_length();
+    HeapRegion* hr = cset_chooser->peek();
+    while (hr != NULL) {
+      if (old_cset_region_length() >= max_old_cset_length) {
+        // Added maximum number of old regions to the CSet.
+        ergo_verbose2(ErgoCSetConstruction,
+                      "finish adding old regions to CSet",
+                      ergo_format_reason("old CSet region num reached max")
+                      ergo_format_region("old")
+                      ergo_format_region("max"),
+                      old_cset_region_length(), max_old_cset_length);
+        break;
       }
 
-      should_continue = true;
-      if (hr == NULL) {
-        // No need for an ergo verbose message here,
-        // getNextMarkRegion() does this when it returns NULL.
-        should_continue = false;
+      double predicted_time_ms = predict_region_elapsed_time_ms(hr, false);
+      if (check_time_remaining) {
+        if (predicted_time_ms > time_remaining_ms) {
+          // Too expensive for the current CSet.
+
+          if (old_cset_region_length() >= min_old_cset_length) {
+            // We have added the minimum number of old regions to the CSet,
+            // we are done with this CSet.
+            ergo_verbose4(ErgoCSetConstruction,
+                          "finish adding old regions to CSet",
+                          ergo_format_reason("predicted time is too high")
+                          ergo_format_ms("predicted time")
+                          ergo_format_ms("remaining time")
+                          ergo_format_region("old")
+                          ergo_format_region("min"),
+                          predicted_time_ms, time_remaining_ms,
+                          old_cset_region_length(), min_old_cset_length);
+            break;
+          }
+
+          // We'll add it anyway given that we haven't reached the
+          // minimum number of old regions.
+          expensive_region_num += 1;
+        }
       } else {
-        if (adaptive_young_list_length()) {
-          if (time_remaining_ms < 0.0) {
-            ergo_verbose1(ErgoCSetConstruction,
-                          "stop adding old regions to CSet",
-                          ergo_format_reason("remaining time is lower than 0")
-                          ergo_format_ms("remaining time"),
-                          time_remaining_ms);
-            should_continue = false;
-          }
-        } else {
-          if (cset_region_length() >= _young_list_fixed_length) {
-            ergo_verbose2(ErgoCSetConstruction,
-                          "stop adding old regions to CSet",
-                          ergo_format_reason("CSet length reached target")
-                          ergo_format_region("CSet")
-                          ergo_format_region("young target"),
-                          cset_region_length(), _young_list_fixed_length);
-            should_continue = false;
-          }
+        if (old_cset_region_length() >= min_old_cset_length) {
+          // In the non-auto-tuning case, we'll finish adding regions
+          // to the CSet if we reach the minimum.
+          ergo_verbose2(ErgoCSetConstruction,
+                        "finish adding old regions to CSet",
+                        ergo_format_reason("old CSet region num reached min")
+                        ergo_format_region("old")
+                        ergo_format_region("min"),
+                        old_cset_region_length(), min_old_cset_length);
+          break;
         }
       }
-    } while (should_continue);
-
-    if (!adaptive_young_list_length() &&
-                             cset_region_length() < _young_list_fixed_length) {
-      ergo_verbose2(ErgoCSetConstruction,
-                    "request partially-young GCs end",
-                    ergo_format_reason("CSet length lower than target")
-                    ergo_format_region("CSet")
-                    ergo_format_region("young target"),
-                    cset_region_length(), _young_list_fixed_length);
-      _should_revert_to_full_young_gcs  = true;
+
+      // We will add this region to the CSet.
+      time_remaining_ms -= predicted_time_ms;
+      predicted_pause_time_ms += predicted_time_ms;
+      cset_chooser->remove_and_move_to_next(hr);
+      _g1->old_set_remove(hr);
+      add_old_region_to_cset(hr);
+
+      hr = cset_chooser->peek();
+    }
+    if (hr == NULL) {
+      ergo_verbose0(ErgoCSetConstruction,
+                    "finish adding old regions to CSet",
+                    ergo_format_reason("candidate old regions not available"));
     }
 
-    ergo_verbose2(ErgoCSetConstruction | ErgoHigh,
-                  "add old regions to CSet",
-                  ergo_format_region("old")
-                  ergo_format_ms("predicted old region time"),
-                  old_cset_region_length(),
-                  predicted_pause_time_ms - prev_predicted_pause_time_ms);
+    if (expensive_region_num > 0) {
+      // We print the information once here at the end, predicated on
+      // whether we added any apparently expensive regions or not, to
+      // avoid generating output per region.
+      ergo_verbose4(ErgoCSetConstruction,
+                    "added expensive regions to CSet",
+                    ergo_format_reason("old CSet region num not reached min")
+                    ergo_format_region("old")
+                    ergo_format_region("expensive")
+                    ergo_format_region("min")
+                    ergo_format_ms("remaining time"),
+                    old_cset_region_length(),
+                    expensive_region_num,
+                    min_old_cset_length,
+                    time_remaining_ms);
+    }
+
+    assert(cset_chooser->verify(), "CSet Chooser verification - post");
   }
 
   stop_incremental_cset_building();
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -65,9 +65,10 @@
 
 class MainBodySummary: public CHeapObj {
   define_num_seq(satb_drain) // optional
+  define_num_seq(root_region_scan_wait)
   define_num_seq(parallel) // parallel only
     define_num_seq(ext_root_scan)
-    define_num_seq(mark_stack_scan)
+    define_num_seq(satb_filtering)
     define_num_seq(update_rs)
     define_num_seq(scan_rs)
     define_num_seq(obj_copy)
@@ -83,6 +84,72 @@
   virtual MainBodySummary*    main_body_summary()    { return this; }
 };
 
+// There are three command line options related to the young gen size:
+// NewSize, MaxNewSize and NewRatio (There is also -Xmn, but that is
+// just a short form for NewSize==MaxNewSize). G1 will use its internal
+// heuristics to calculate the actual young gen size, so these options
+// basically only limit the range within which G1 can pick a young gen
+// size. Also, these are general options taking byte sizes. G1 will
+// internally work with a number of regions instead. So, some rounding
+// will occur.
+//
+// If nothing related to the the young gen size is set on the command
+// line we should allow the young gen to be between
+// G1DefaultMinNewGenPercent and G1DefaultMaxNewGenPercent of the
+// heap size. This means that every time the heap size changes the
+// limits for the young gen size will be updated.
+//
+// If only -XX:NewSize is set we should use the specified value as the
+// minimum size for young gen. Still using G1DefaultMaxNewGenPercent
+// of the heap as maximum.
+//
+// If only -XX:MaxNewSize is set we should use the specified value as the
+// maximum size for young gen. Still using G1DefaultMinNewGenPercent
+// of the heap as minimum.
+//
+// If -XX:NewSize and -XX:MaxNewSize are both specified we use these values.
+// No updates when the heap size changes. There is a special case when
+// NewSize==MaxNewSize. This is interpreted as "fixed" and will use a
+// different heuristic for calculating the collection set when we do mixed
+// collection.
+//
+// If only -XX:NewRatio is set we should use the specified ratio of the heap
+// as both min and max. This will be interpreted as "fixed" just like the
+// NewSize==MaxNewSize case above. But we will update the min and max
+// everytime the heap size changes.
+//
+// NewSize and MaxNewSize override NewRatio. So, NewRatio is ignored if it is
+// combined with either NewSize or MaxNewSize. (A warning message is printed.)
+class G1YoungGenSizer : public CHeapObj {
+private:
+  enum SizerKind {
+    SizerDefaults,
+    SizerNewSizeOnly,
+    SizerMaxNewSizeOnly,
+    SizerMaxAndNewSize,
+    SizerNewRatio
+  };
+  SizerKind _sizer_kind;
+  size_t _min_desired_young_length;
+  size_t _max_desired_young_length;
+  bool _adaptive_size;
+  size_t calculate_default_min_length(size_t new_number_of_heap_regions);
+  size_t calculate_default_max_length(size_t new_number_of_heap_regions);
+
+public:
+  G1YoungGenSizer();
+  void heap_size_changed(size_t new_number_of_heap_regions);
+  size_t min_desired_young_length() {
+    return _min_desired_young_length;
+  }
+  size_t max_desired_young_length() {
+    return _max_desired_young_length;
+  }
+  bool adaptive_young_list_length() {
+    return _adaptive_size;
+  }
+};
+
 class G1CollectorPolicy: public CollectorPolicy {
 private:
   // either equal to the number of parallel threads, if ParallelGCThreads
@@ -111,7 +178,6 @@
   double _cur_collection_start_sec;
   size_t _cur_collection_pause_used_at_start_bytes;
   size_t _cur_collection_pause_used_regions_at_start;
-  size_t _prev_collection_pause_used_at_end_bytes;
   double _cur_collection_par_time_ms;
   double _cur_satb_drain_time_ms;
   double _cur_clear_ct_time_ms;
@@ -149,7 +215,7 @@
 
   double* _par_last_gc_worker_start_times_ms;
   double* _par_last_ext_root_scan_times_ms;
-  double* _par_last_mark_stack_scan_times_ms;
+  double* _par_last_satb_filtering_times_ms;
   double* _par_last_update_rs_times_ms;
   double* _par_last_update_rs_processed_buffers;
   double* _par_last_scan_rs_times_ms;
@@ -164,12 +230,9 @@
   // times for a given worker thread.
   double* _par_last_gc_worker_other_times_ms;
 
-  // indicates whether we are in full young or partially young GC mode
-  bool _full_young_gcs;
+  // indicates whether we are in young or mixed GC mode
+  bool _gcs_are_young;
 
-  // if true, then it tries to dynamically adjust the length of the
-  // young list
-  bool _adaptive_young_list_length;
   size_t _young_list_target_length;
   size_t _young_list_fixed_length;
   size_t _prev_eden_capacity; // used for logging
@@ -178,10 +241,10 @@
   // locker is active. This should be >= _young_list_target_length;
   size_t _young_list_max_length;
 
-  bool   _last_young_gc_full;
+  bool                  _last_gc_was_young;
 
-  unsigned              _full_young_pause_num;
-  unsigned              _partial_young_pause_num;
+  unsigned              _young_pause_num;
+  unsigned              _mixed_pause_num;
 
   bool                  _during_marking;
   bool                  _in_marking_window;
@@ -211,10 +274,10 @@
   TruncatedSeq* _pending_card_diff_seq;
   TruncatedSeq* _rs_length_diff_seq;
   TruncatedSeq* _cost_per_card_ms_seq;
-  TruncatedSeq* _fully_young_cards_per_entry_ratio_seq;
-  TruncatedSeq* _partially_young_cards_per_entry_ratio_seq;
+  TruncatedSeq* _young_cards_per_entry_ratio_seq;
+  TruncatedSeq* _mixed_cards_per_entry_ratio_seq;
   TruncatedSeq* _cost_per_entry_ms_seq;
-  TruncatedSeq* _partially_young_cost_per_entry_ms_seq;
+  TruncatedSeq* _mixed_cost_per_entry_ms_seq;
   TruncatedSeq* _cost_per_byte_ms_seq;
   TruncatedSeq* _constant_other_time_ms_seq;
   TruncatedSeq* _young_other_cost_per_region_ms_seq;
@@ -227,9 +290,7 @@
 
   TruncatedSeq* _young_gc_eff_seq;
 
-  bool   _using_new_ratio_calculations;
-  size_t _min_desired_young_length; // as set on the command line or default calculations
-  size_t _max_desired_young_length; // as set on the command line or default calculations
+  G1YoungGenSizer* _young_gen_sizer;
 
   size_t _eden_cset_region_length;
   size_t _survivor_cset_region_length;
@@ -251,16 +312,13 @@
   double _recorded_non_young_free_cset_time_ms;
 
   double _sigma;
-  double _expensive_region_limit_ms;
 
   size_t _rs_lengths_prediction;
 
   size_t _known_garbage_bytes;
   double _known_garbage_ratio;
 
-  double sigma() {
-    return _sigma;
-  }
+  double sigma() { return _sigma; }
 
   // A function that prevents us putting too much stock in small sample
   // sets.  Returns a number between 2.0 and 1.0, depending on the number
@@ -322,20 +380,22 @@
 
   size_t predict_pending_card_diff() {
     double prediction = get_new_neg_prediction(_pending_card_diff_seq);
-    if (prediction < 0.00001)
+    if (prediction < 0.00001) {
       return 0;
-    else
+    } else {
       return (size_t) prediction;
+    }
   }
 
   size_t predict_pending_cards() {
     size_t max_pending_card_num = _g1->max_pending_card_num();
     size_t diff = predict_pending_card_diff();
     size_t prediction;
-    if (diff > max_pending_card_num)
+    if (diff > max_pending_card_num) {
       prediction = max_pending_card_num;
-    else
+    } else {
       prediction = max_pending_card_num - diff;
+    }
 
     return prediction;
   }
@@ -356,57 +416,62 @@
     return (double) pending_cards * predict_cost_per_card_ms();
   }
 
-  double predict_fully_young_cards_per_entry_ratio() {
-    return get_new_prediction(_fully_young_cards_per_entry_ratio_seq);
+  double predict_young_cards_per_entry_ratio() {
+    return get_new_prediction(_young_cards_per_entry_ratio_seq);
   }
 
-  double predict_partially_young_cards_per_entry_ratio() {
-    if (_partially_young_cards_per_entry_ratio_seq->num() < 2)
-      return predict_fully_young_cards_per_entry_ratio();
-    else
-      return get_new_prediction(_partially_young_cards_per_entry_ratio_seq);
+  double predict_mixed_cards_per_entry_ratio() {
+    if (_mixed_cards_per_entry_ratio_seq->num() < 2) {
+      return predict_young_cards_per_entry_ratio();
+    } else {
+      return get_new_prediction(_mixed_cards_per_entry_ratio_seq);
+    }
   }
 
   size_t predict_young_card_num(size_t rs_length) {
     return (size_t) ((double) rs_length *
-                     predict_fully_young_cards_per_entry_ratio());
+                     predict_young_cards_per_entry_ratio());
   }
 
   size_t predict_non_young_card_num(size_t rs_length) {
     return (size_t) ((double) rs_length *
-                     predict_partially_young_cards_per_entry_ratio());
+                     predict_mixed_cards_per_entry_ratio());
   }
 
   double predict_rs_scan_time_ms(size_t card_num) {
-    if (full_young_gcs())
+    if (gcs_are_young()) {
       return (double) card_num * get_new_prediction(_cost_per_entry_ms_seq);
-    else
-      return predict_partially_young_rs_scan_time_ms(card_num);
+    } else {
+      return predict_mixed_rs_scan_time_ms(card_num);
+    }
   }
 
-  double predict_partially_young_rs_scan_time_ms(size_t card_num) {
-    if (_partially_young_cost_per_entry_ms_seq->num() < 3)
+  double predict_mixed_rs_scan_time_ms(size_t card_num) {
+    if (_mixed_cost_per_entry_ms_seq->num() < 3) {
       return (double) card_num * get_new_prediction(_cost_per_entry_ms_seq);
-    else
-      return (double) card_num *
-        get_new_prediction(_partially_young_cost_per_entry_ms_seq);
+    } else {
+      return (double) (card_num *
+                       get_new_prediction(_mixed_cost_per_entry_ms_seq));
+    }
   }
 
   double predict_object_copy_time_ms_during_cm(size_t bytes_to_copy) {
-    if (_cost_per_byte_ms_during_cm_seq->num() < 3)
-      return 1.1 * (double) bytes_to_copy *
-        get_new_prediction(_cost_per_byte_ms_seq);
-    else
+    if (_cost_per_byte_ms_during_cm_seq->num() < 3) {
+      return (1.1 * (double) bytes_to_copy) *
+              get_new_prediction(_cost_per_byte_ms_seq);
+    } else {
       return (double) bytes_to_copy *
-        get_new_prediction(_cost_per_byte_ms_during_cm_seq);
+             get_new_prediction(_cost_per_byte_ms_during_cm_seq);
+    }
   }
 
   double predict_object_copy_time_ms(size_t bytes_to_copy) {
-    if (_in_marking_window && !_in_marking_window_im)
+    if (_in_marking_window && !_in_marking_window_im) {
       return predict_object_copy_time_ms_during_cm(bytes_to_copy);
-    else
+    } else {
       return (double) bytes_to_copy *
-        get_new_prediction(_cost_per_byte_ms_seq);
+              get_new_prediction(_cost_per_byte_ms_seq);
+    }
   }
 
   double predict_constant_other_time_ms() {
@@ -414,19 +479,15 @@
   }
 
   double predict_young_other_time_ms(size_t young_num) {
-    return
-      (double) young_num *
-      get_new_prediction(_young_other_cost_per_region_ms_seq);
+    return (double) young_num *
+           get_new_prediction(_young_other_cost_per_region_ms_seq);
   }
 
   double predict_non_young_other_time_ms(size_t non_young_num) {
-    return
-      (double) non_young_num *
-      get_new_prediction(_non_young_other_cost_per_region_ms_seq);
+    return (double) non_young_num *
+           get_new_prediction(_non_young_other_cost_per_region_ms_seq);
   }
 
-  void check_if_region_is_too_expensive(double predicted_time_ms);
-
   double predict_young_collection_elapsed_time_ms(size_t adjustment);
   double predict_base_elapsed_time_ms(size_t pending_cards);
   double predict_base_elapsed_time_ms(size_t pending_cards,
@@ -456,7 +517,7 @@
   double predict_survivor_regions_evac_time();
 
   void cset_regions_freed() {
-    bool propagate = _last_young_gc_full && !_in_marking_window;
+    bool propagate = _last_gc_was_young && !_in_marking_window;
     _short_lived_surv_rate_group->all_surviving_words_recorded(propagate);
     _survivor_surv_rate_group->all_surviving_words_recorded(propagate);
     // also call it on any more surv rate groups
@@ -583,16 +644,29 @@
   // Used to record the highest end of heap region in collection set
   HeapWord* _inc_cset_max_finger;
 
-  // The RSet lengths recorded for regions in the collection set
-  // (updated by the periodic sampling of the regions in the
-  // young list/collection set).
+  // The RSet lengths recorded for regions in the CSet. It is updated
+  // by the thread that adds a new region to the CSet. We assume that
+  // only one thread can be allocating a new CSet region (currently,
+  // it does so after taking the Heap_lock) hence no need to
+  // synchronize updates to this field.
   size_t _inc_cset_recorded_rs_lengths;
 
-  // The predicted elapsed time it will take to collect the regions
-  // in the collection set (updated by the periodic sampling of the
-  // regions in the young list/collection set).
+  // A concurrent refinement thread periodcially samples the young
+  // region RSets and needs to update _inc_cset_recorded_rs_lengths as
+  // the RSets grow. Instead of having to syncronize updates to that
+  // field we accumulate them in this field and add it to
+  // _inc_cset_recorded_rs_lengths_diffs at the start of a GC.
+  ssize_t _inc_cset_recorded_rs_lengths_diffs;
+
+  // The predicted elapsed time it will take to collect the regions in
+  // the CSet. This is updated by the thread that adds a new region to
+  // the CSet. See the comment for _inc_cset_recorded_rs_lengths about
+  // MT-safety assumptions.
   double _inc_cset_predicted_elapsed_time_ms;
 
+  // See the comment for _inc_cset_recorded_rs_lengths_diffs.
+  double _inc_cset_predicted_elapsed_time_ms_diffs;
+
   // Stash a pointer to the g1 heap.
   G1CollectedHeap* _g1;
 
@@ -628,8 +702,7 @@
   // initial-mark work.
   volatile bool _during_initial_mark_pause;
 
-  bool _should_revert_to_full_young_gcs;
-  bool _last_full_young_gc;
+  bool _last_young_gc;
 
   // This set of variables tracks the collector efficiency, in order to
   // determine whether we should initiate a new marking.
@@ -637,6 +710,7 @@
   double _mark_remark_start_sec;
   double _mark_cleanup_start_sec;
   double _mark_closure_time_ms;
+  double _root_region_scan_wait_time_ms;
 
   // Update the young list target length either by setting it to the
   // desired fixed value or by calculating it using G1's pause
@@ -677,8 +751,6 @@
   // Count the number of bytes used in the CS.
   void count_CS_bytes_used();
 
-  void update_young_list_size_using_newratio(size_t number_of_heap_regions);
-
 public:
 
   G1CollectorPolicy();
@@ -705,8 +777,6 @@
   // This should be called after the heap is resized.
   void record_new_heap_size(size_t new_number_of_regions);
 
-public:
-
   void init();
 
   // Create jstat counters for the policy.
@@ -725,6 +795,8 @@
 
   GenRemSet::Name  rem_set_name()     { return GenRemSet::CardTable; }
 
+  bool need_to_start_conc_mark(const char* source, size_t alloc_word_size = 0);
+
   // Update the heuristic info to record a collection pause of the given
   // start time, where the given number of bytes were used at the start.
   // This may involve changing the desired size of a collection set.
@@ -741,6 +813,10 @@
     _mark_closure_time_ms = mark_closure_time_ms;
   }
 
+  void record_root_region_scan_wait_time(double time_ms) {
+    _root_region_scan_wait_time_ms = time_ms;
+  }
+
   void record_concurrent_mark_remark_start();
   void record_concurrent_mark_remark_end();
 
@@ -766,8 +842,8 @@
     _par_last_ext_root_scan_times_ms[worker_i] = ms;
   }
 
-  void record_mark_stack_scan_time(int worker_i, double ms) {
-    _par_last_mark_stack_scan_times_ms[worker_i] = ms;
+  void record_satb_filtering_time(int worker_i, double ms) {
+    _par_last_satb_filtering_times_ms[worker_i] = ms;
   }
 
   void record_satb_drain_time(double ms) {
@@ -864,10 +940,17 @@
     return _bytes_copied_during_gc;
   }
 
+  // Determine whether the next GC should be mixed. Called to determine
+  // whether to start mixed GCs or whether to carry on doing mixed
+  // GCs. The two action strings are used in the ergo output when the
+  // method returns true or false.
+  bool next_gc_should_be_mixed(const char* true_action_str,
+                               const char* false_action_str);
+
   // Choose a new collection set.  Marks the chosen regions as being
   // "in_collection_set", and links them together.  The head and number of
   // the collection set are available via access methods.
-  void choose_collection_set(double target_pause_time_ms);
+  void finalize_cset(double target_pause_time_ms);
 
   // The head of the list (via "next_in_collection_set()") representing the
   // current collection set.
@@ -889,6 +972,10 @@
   // Initialize incremental collection set info.
   void start_incremental_cset_building();
 
+  // Perform any final calculations on the incremental CSet fields
+  // before we can use them.
+  void finalize_incremental_cset_building();
+
   void clear_incremental_cset() {
     _inc_cset_head = NULL;
     _inc_cset_tail = NULL;
@@ -897,10 +984,9 @@
   // Stop adding regions to the incremental collection set
   void stop_incremental_cset_building() { _inc_cset_build_state = Inactive; }
 
-  // Add/remove information about hr to the aggregated information
-  // for the incrementally built collection set.
+  // Add information about hr to the aggregated information for the
+  // incrementally built collection set.
   void add_to_incremental_cset_info(HeapRegion* hr, size_t rs_length);
-  void remove_from_incremental_cset_info(HeapRegion* hr);
 
   // Update information about hr in the aggregated information for
   // the incrementally built collection set.
@@ -985,18 +1071,15 @@
     return _young_list_max_length;
   }
 
-  bool full_young_gcs() {
-    return _full_young_gcs;
+  bool gcs_are_young() {
+    return _gcs_are_young;
   }
-  void set_full_young_gcs(bool full_young_gcs) {
-    _full_young_gcs = full_young_gcs;
+  void set_gcs_are_young(bool gcs_are_young) {
+    _gcs_are_young = gcs_are_young;
   }
 
   bool adaptive_young_list_length() {
-    return _adaptive_young_list_length;
-  }
-  void set_adaptive_young_list_length(bool adaptive_young_list_length) {
-    _adaptive_young_list_length = adaptive_young_list_length;
+    return _young_gen_sizer->adaptive_young_list_length();
   }
 
   inline double get_gc_eff_factor() {
--- a/src/share/vm/gc_implementation/g1/g1ErgoVerbose.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/g1/g1ErgoVerbose.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -52,14 +52,13 @@
 const char* G1ErgoVerbose::to_string(int tag) {
   ErgoHeuristic n = extract_heuristic(tag);
   switch (n) {
-  case ErgoHeapSizing:            return "Heap Sizing";
-  case ErgoCSetConstruction:      return "CSet Construction";
-  case ErgoConcCycles:            return "Concurrent Cycles";
-  case ErgoPartiallyYoungGCs:     return "Partially-Young GCs";
+  case ErgoHeapSizing:        return "Heap Sizing";
+  case ErgoCSetConstruction:  return "CSet Construction";
+  case ErgoConcCycles:        return "Concurrent Cycles";
+  case ErgoMixedGCs:          return "Mixed GCs";
   default:
     ShouldNotReachHere();
     // Keep the Windows compiler happy
     return NULL;
   }
 }
-
--- a/src/share/vm/gc_implementation/g1/g1ErgoVerbose.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/g1/g1ErgoVerbose.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -69,7 +69,7 @@
   ErgoHeapSizing = 0,
   ErgoCSetConstruction,
   ErgoConcCycles,
-  ErgoPartiallyYoungGCs,
+  ErgoMixedGCs,
 
   ErgoHeuristicNum
 } ErgoHeuristic;
@@ -131,8 +131,8 @@
                              ", " _name_ ": "SIZE_FORMAT" bytes (%1.2f %%)"
 
 // Generates the format string
-#define ergo_format(_action_, _extra_format_)                   \
-  " %1.3f: [G1Ergonomics (%s) " _action_ _extra_format_ "]"
+#define ergo_format(_extra_format_)                           \
+  " %1.3f: [G1Ergonomics (%s) %s" _extra_format_ "]"
 
 // Conditionally, prints an ergonomic decision record. _extra_format_
 // is the format string for the optional items we'd like to print
@@ -145,20 +145,21 @@
 // them to the print method. For convenience, we have wrapper macros
 // below which take a specific number of arguments and set the rest to
 // a default value.
-#define ergo_verbose_common(_tag_, _action_, _extra_format_,            \
+#define ergo_verbose_common(_tag_, _action_, _extra_format_,                \
                             _arg0_, _arg1_, _arg2_, _arg3_, _arg4_, _arg5_) \
-  do {                                                                  \
-    if (G1ErgoVerbose::enabled((_tag_))) {                              \
-      gclog_or_tty->print_cr(ergo_format(_action_, _extra_format_),     \
-                             os::elapsedTime(),                         \
-                             G1ErgoVerbose::to_string((_tag_)),         \
-                             (_arg0_), (_arg1_), (_arg2_),              \
-                             (_arg3_), (_arg4_), (_arg5_));             \
-    }                                                                   \
+  do {                                                                      \
+    if (G1ErgoVerbose::enabled((_tag_))) {                                  \
+      gclog_or_tty->print_cr(ergo_format(_extra_format_),                   \
+                             os::elapsedTime(),                             \
+                             G1ErgoVerbose::to_string((_tag_)),             \
+                             (_action_),                                    \
+                             (_arg0_), (_arg1_), (_arg2_),                  \
+                             (_arg3_), (_arg4_), (_arg5_));                 \
+    }                                                                       \
   } while (0)
 
 
-#define ergo_verbose(_tag_, _action_)                           \
+#define ergo_verbose(_tag_, _action_)                                   \
   ergo_verbose_common(_tag_, _action_, "", 0, 0, 0, 0, 0, 0)
 
 #define ergo_verbose0(_tag_, _action_, _extra_format_)                  \
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1EvacFailure.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -0,0 +1,243 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1EVACFAILURE_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_G1_G1EVACFAILURE_HPP
+
+#include "gc_implementation/g1/concurrentMark.inline.hpp"
+#include "gc_implementation/g1/dirtyCardQueue.hpp"
+#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
+#include "gc_implementation/g1/g1_globals.hpp"
+#include "gc_implementation/g1/g1OopClosures.inline.hpp"
+#include "gc_implementation/g1/heapRegion.hpp"
+#include "gc_implementation/g1/heapRegionRemSet.hpp"
+#include "utilities/workgroup.hpp"
+
+// Closures and tasks associated with any self-forwarding pointers
+// installed as a result of an evacuation failure.
+
+class UpdateRSetDeferred : public OopsInHeapRegionClosure {
+private:
+  G1CollectedHeap* _g1;
+  DirtyCardQueue *_dcq;
+  CardTableModRefBS* _ct_bs;
+
+public:
+  UpdateRSetDeferred(G1CollectedHeap* g1, DirtyCardQueue* dcq) :
+    _g1(g1), _ct_bs((CardTableModRefBS*)_g1->barrier_set()), _dcq(dcq) {}
+
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+  virtual void do_oop(      oop* p) { do_oop_work(p); }
+  template <class T> void do_oop_work(T* p) {
+    assert(_from->is_in_reserved(p), "paranoia");
+    if (!_from->is_in_reserved(oopDesc::load_decode_heap_oop(p)) &&
+        !_from->is_survivor()) {
+      size_t card_index = _ct_bs->index_for(p);
+      if (_ct_bs->mark_card_deferred(card_index)) {
+        _dcq->enqueue((jbyte*)_ct_bs->byte_for_index(card_index));
+      }
+    }
+  }
+};
+
+class RemoveSelfForwardPtrObjClosure: public ObjectClosure {
+private:
+  G1CollectedHeap* _g1;
+  ConcurrentMark* _cm;
+  HeapRegion* _hr;
+  size_t _marked_bytes;
+  OopsInHeapRegionClosure *_update_rset_cl;
+  bool _during_initial_mark;
+  bool _during_conc_mark;
+  uint _worker_id;
+
+public:
+  RemoveSelfForwardPtrObjClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
+                                 HeapRegion* hr,
+                                 OopsInHeapRegionClosure* update_rset_cl,
+                                 bool during_initial_mark,
+                                 bool during_conc_mark,
+                                 uint worker_id) :
+    _g1(g1), _cm(cm), _hr(hr), _marked_bytes(0),
+    _update_rset_cl(update_rset_cl),
+    _during_initial_mark(during_initial_mark),
+    _during_conc_mark(during_conc_mark),
+    _worker_id(worker_id) { }
+
+  size_t marked_bytes() { return _marked_bytes; }
+
+  // <original comment>
+  // The original idea here was to coalesce evacuated and dead objects.
+  // However that caused complications with the block offset table (BOT).
+  // In particular if there were two TLABs, one of them partially refined.
+  // |----- TLAB_1--------|----TLAB_2-~~~(partially refined part)~~~|
+  // The BOT entries of the unrefined part of TLAB_2 point to the start
+  // of TLAB_2. If the last object of the TLAB_1 and the first object
+  // of TLAB_2 are coalesced, then the cards of the unrefined part
+  // would point into middle of the filler object.
+  // The current approach is to not coalesce and leave the BOT contents intact.
+  // </original comment>
+  //
+  // We now reset the BOT when we start the object iteration over the
+  // region and refine its entries for every object we come across. So
+  // the above comment is not really relevant and we should be able
+  // to coalesce dead objects if we want to.
+  void do_object(oop obj) {
+    HeapWord* obj_addr = (HeapWord*) obj;
+    assert(_hr->is_in(obj_addr), "sanity");
+    size_t obj_size = obj->size();
+    _hr->update_bot_for_object(obj_addr, obj_size);
+
+    if (obj->is_forwarded() && obj->forwardee() == obj) {
+      // The object failed to move.
+
+      // We consider all objects that we find self-forwarded to be
+      // live. What we'll do is that we'll update the prev marking
+      // info so that they are all under PTAMS and explicitly marked.
+      _cm->markPrev(obj);
+      if (_during_initial_mark) {
+        // For the next marking info we'll only mark the
+        // self-forwarded objects explicitly if we are during
+        // initial-mark (since, normally, we only mark objects pointed
+        // to by roots if we succeed in copying them). By marking all
+        // self-forwarded objects we ensure that we mark any that are
+        // still pointed to be roots. During concurrent marking, and
+        // after initial-mark, we don't need to mark any objects
+        // explicitly and all objects in the CSet are considered
+        // (implicitly) live. So, we won't mark them explicitly and
+        // we'll leave them over NTAMS.
+        _cm->grayRoot(obj, obj_size, _worker_id, _hr);
+      }
+      _marked_bytes += (obj_size * HeapWordSize);
+      obj->set_mark(markOopDesc::prototype());
+
+      // While we were processing RSet buffers during the collection,
+      // we actually didn't scan any cards on the collection set,
+      // since we didn't want to update remembered sets with entries
+      // that point into the collection set, given that live objects
+      // from the collection set are about to move and such entries
+      // will be stale very soon.
+      // This change also dealt with a reliability issue which
+      // involved scanning a card in the collection set and coming
+      // across an array that was being chunked and looking malformed.
+      // The problem is that, if evacuation fails, we might have
+      // remembered set entries missing given that we skipped cards on
+      // the collection set. So, we'll recreate such entries now.
+      obj->oop_iterate(_update_rset_cl);
+      assert(_cm->isPrevMarked(obj), "Should be marked!");
+    } else {
+      // The object has been either evacuated or is dead. Fill it with a
+      // dummy object.
+      MemRegion mr((HeapWord*) obj, obj_size);
+      CollectedHeap::fill_with_object(mr);
+    }
+  }
+};
+
+class RemoveSelfForwardPtrHRClosure: public HeapRegionClosure {
+  G1CollectedHeap* _g1h;
+  ConcurrentMark* _cm;
+  OopsInHeapRegionClosure *_update_rset_cl;
+  uint _worker_id;
+
+public:
+  RemoveSelfForwardPtrHRClosure(G1CollectedHeap* g1h,
+                                OopsInHeapRegionClosure* update_rset_cl,
+                                uint worker_id) :
+    _g1h(g1h), _update_rset_cl(update_rset_cl),
+    _worker_id(worker_id), _cm(_g1h->concurrent_mark()) { }
+
+  bool doHeapRegion(HeapRegion *hr) {
+    bool during_initial_mark = _g1h->g1_policy()->during_initial_mark_pause();
+    bool during_conc_mark = _g1h->mark_in_progress();
+
+    assert(!hr->isHumongous(), "sanity");
+    assert(hr->in_collection_set(), "bad CS");
+
+    if (hr->claimHeapRegion(HeapRegion::ParEvacFailureClaimValue)) {
+      if (hr->evacuation_failed()) {
+        RemoveSelfForwardPtrObjClosure rspc(_g1h, _cm, hr, _update_rset_cl,
+                                            during_initial_mark,
+                                            during_conc_mark,
+                                            _worker_id);
+
+        MemRegion mr(hr->bottom(), hr->end());
+        // We'll recreate the prev marking info so we'll first clear
+        // the prev bitmap range for this region. We never mark any
+        // CSet objects explicitly so the next bitmap range should be
+        // cleared anyway.
+        _cm->clearRangePrevBitmap(mr);
+
+        hr->note_self_forwarding_removal_start(during_initial_mark,
+                                               during_conc_mark);
+
+        // In the common case (i.e. when there is no evacuation
+        // failure) we make sure that the following is done when
+        // the region is freed so that it is "ready-to-go" when it's
+        // re-allocated. However, when evacuation failure happens, a
+        // region will remain in the heap and might ultimately be added
+        // to a CSet in the future. So we have to be careful here and
+        // make sure the region's RSet is ready for parallel iteration
+        // whenever this might be required in the future.
+        hr->rem_set()->reset_for_par_iteration();
+        hr->reset_bot();
+        _update_rset_cl->set_region(hr);
+        hr->object_iterate(&rspc);
+
+        hr->note_self_forwarding_removal_end(during_initial_mark,
+                                             during_conc_mark,
+                                             rspc.marked_bytes());
+      }
+    }
+    return false;
+  }
+};
+
+class G1ParRemoveSelfForwardPtrsTask: public AbstractGangTask {
+protected:
+  G1CollectedHeap* _g1h;
+
+public:
+  G1ParRemoveSelfForwardPtrsTask(G1CollectedHeap* g1h) :
+    AbstractGangTask("G1 Remove Self-forwarding Pointers"),
+    _g1h(g1h) { }
+
+  void work(uint worker_id) {
+    UpdateRSetImmediate immediate_update(_g1h->g1_rem_set());
+    DirtyCardQueue dcq(&_g1h->dirty_card_queue_set());
+    UpdateRSetDeferred deferred_update(_g1h, &dcq);
+
+    OopsInHeapRegionClosure *update_rset_cl = &deferred_update;
+    if (!G1DeferredRSUpdate) {
+      update_rset_cl = &immediate_update;
+    }
+
+    RemoveSelfForwardPtrHRClosure rsfp_cl(_g1h, update_rset_cl, worker_id);
+
+    HeapRegion* hr = _g1h->start_cset_region_for_worker(worker_id);
+    _g1h->collection_set_iterate_from(hr, &rsfp_cl);
+  }
+};
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1EVACFAILURE_HPP
--- a/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -126,7 +126,6 @@
 void G1MarkSweep::mark_sweep_phase1(bool& marked_for_unloading,
                                     bool clear_all_softrefs) {
   // Recursively traverse all live objects and mark them
-  EventMark m("1 mark object");
   TraceTime tm("phase 1", PrintGC && Verbose, true, gclog_or_tty);
   GenMarkSweep::trace(" 1");
 
@@ -292,7 +291,6 @@
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   Generation* pg = g1h->perm_gen();
 
-  EventMark m("2 compute new addresses");
   TraceTime tm("phase 2", PrintGC && Verbose, true, gclog_or_tty);
   GenMarkSweep::trace("2");
 
@@ -337,7 +335,6 @@
   Generation* pg = g1h->perm_gen();
 
   // Adjust the pointers to reflect the new locations
-  EventMark m("3 adjust pointers");
   TraceTime tm("phase 3", PrintGC && Verbose, true, gclog_or_tty);
   GenMarkSweep::trace("3");
 
@@ -402,7 +399,6 @@
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   Generation* pg = g1h->perm_gen();
 
-  EventMark m("4 compact heap");
   TraceTime tm("phase 4", PrintGC && Verbose, true, gclog_or_tty);
   GenMarkSweep::trace("4");
 
--- a/src/share/vm/gc_implementation/g1/g1MonitoringSupport.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/g1/g1MonitoringSupport.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -89,16 +89,15 @@
 //
 // * Min Capacity
 //
-//    We set this to 0 for all spaces. We could consider setting the old
-//    min capacity to the min capacity of the heap (see 7078465).
+//    We set this to 0 for all spaces.
 //
 // * Max Capacity
 //
 //    For jstat, we set the max capacity of all spaces to heap_capacity,
-//    given that we don't always have a reasonably upper bound on how big
-//    each space can grow. For the memory pools, we actually make the max
-//    capacity undefined. We could consider setting the old max capacity
-//    to the max capacity of the heap (see 7078465).
+//    given that we don't always have a reasonable upper bound on how big
+//    each space can grow. For the memory pools, we make the max
+//    capacity undefined with the exception of the old memory pool for
+//    which we make the max capacity same as the max heap capacity.
 //
 // If we had more accurate occupancy / capacity information per
 // region set the above calculations would be greatly simplified and
@@ -119,7 +118,7 @@
   G1CollectedHeap* _g1h;
 
   // jstat performance counters
-  //  incremental collections both fully and partially young
+  //  incremental collections both young and mixed
   CollectorCounters*   _incremental_collection_counters;
   //  full stop-the-world collections
   CollectorCounters*   _full_collection_counters;
--- a/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -51,6 +51,7 @@
   G1RemSet* _g1_rem;
   ConcurrentMark* _cm;
   G1ParScanThreadState* _par_scan_state;
+  uint _worker_id;
   bool _during_initial_mark;
   bool _mark_in_progress;
 public:
@@ -121,17 +122,25 @@
 class G1ParCopyHelper : public G1ParClosureSuper {
   G1ParScanClosure *_scanner;
 protected:
-  template <class T> void mark_object(T* p);
-  oop copy_to_survivor_space(oop obj, bool should_mark_root,
-                                      bool should_mark_copy);
+  // Mark the object if it's not already marked. This is used to mark
+  // objects pointed to by roots that are guaranteed not to move
+  // during the GC (i.e., non-CSet objects). It is MT-safe.
+  void mark_object(oop obj);
+
+  // Mark the object if it's not already marked. This is used to mark
+  // objects pointed to by roots that have been forwarded during a
+  // GC. It is MT-safe.
+  void mark_forwarded_object(oop from_obj, oop to_obj);
+
+  oop copy_to_survivor_space(oop obj);
+
 public:
   G1ParCopyHelper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state,
                   G1ParScanClosure *scanner) :
     G1ParClosureSuper(g1, par_scan_state), _scanner(scanner) { }
 };
 
-template<bool do_gen_barrier, G1Barrier barrier,
-         bool do_mark_object>
+template <bool do_gen_barrier, G1Barrier barrier, bool do_mark_object>
 class G1ParCopyClosure : public G1ParCopyHelper {
   G1ParScanClosure _scanner;
 
@@ -140,9 +149,8 @@
 public:
   G1ParCopyClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state,
                    ReferenceProcessor* rp) :
-    _scanner(g1, par_scan_state, rp),
-    G1ParCopyHelper(g1, par_scan_state, &_scanner)
-  {
+      _scanner(g1, par_scan_state, rp),
+      G1ParCopyHelper(g1, par_scan_state, &_scanner) {
     assert(_ref_processor == NULL, "sanity");
   }
 
@@ -212,6 +220,7 @@
 
 // Closure for iterating over object fields during concurrent marking
 class G1CMOopClosure : public OopClosure {
+private:
   G1CollectedHeap*   _g1h;
   ConcurrentMark*    _cm;
   CMTask*            _task;
@@ -222,4 +231,92 @@
   virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
 };
 
+// Closure to scan the root regions during concurrent marking
+class G1RootRegionScanClosure : public OopClosure {
+private:
+  G1CollectedHeap* _g1h;
+  ConcurrentMark*  _cm;
+  uint _worker_id;
+public:
+  G1RootRegionScanClosure(G1CollectedHeap* g1h, ConcurrentMark* cm,
+                          uint worker_id) :
+    _g1h(g1h), _cm(cm), _worker_id(worker_id) { }
+  template <class T> void do_oop_nv(T* p);
+  virtual void do_oop(      oop* p) { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
+};
+
+// Closure that applies the given two closures in sequence.
+// Used by the RSet refinement code (when updating RSets
+// during an evacuation pause) to record cards containing
+// pointers into the collection set.
+
+class G1Mux2Closure : public OopClosure {
+  OopClosure* _c1;
+  OopClosure* _c2;
+public:
+  G1Mux2Closure(OopClosure *c1, OopClosure *c2);
+  template <class T> void do_oop_nv(T* p);
+  virtual void do_oop(oop* p)        { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p)  { do_oop_nv(p); }
+};
+
+// A closure that returns true if it is actually applied
+// to a reference
+
+class G1TriggerClosure : public OopClosure {
+  bool _triggered;
+public:
+  G1TriggerClosure();
+  bool triggered() const { return _triggered; }
+  template <class T> void do_oop_nv(T* p);
+  virtual void do_oop(oop* p)        { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p)  { do_oop_nv(p); }
+};
+
+// A closure which uses a triggering closure to determine
+// whether to apply an oop closure.
+
+class G1InvokeIfNotTriggeredClosure: public OopClosure {
+  G1TriggerClosure* _trigger_cl;
+  OopClosure* _oop_cl;
+public:
+  G1InvokeIfNotTriggeredClosure(G1TriggerClosure* t, OopClosure* oc);
+  template <class T> void do_oop_nv(T* p);
+  virtual void do_oop(oop* p)        { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p)  { do_oop_nv(p); }
+};
+
+class G1UpdateRSOrPushRefOopClosure: public OopClosure {
+  G1CollectedHeap* _g1;
+  G1RemSet* _g1_rem_set;
+  HeapRegion* _from;
+  OopsInHeapRegionClosure* _push_ref_cl;
+  bool _record_refs_into_cset;
+  int _worker_i;
+
+public:
+  G1UpdateRSOrPushRefOopClosure(G1CollectedHeap* g1h,
+                                G1RemSet* rs,
+                                OopsInHeapRegionClosure* push_ref_cl,
+                                bool record_refs_into_cset,
+                                int worker_i = 0);
+
+  void set_from(HeapRegion* from) {
+    assert(from != NULL, "from region must be non-NULL");
+    _from = from;
+  }
+
+  bool self_forwarded(oop obj) {
+    bool result = (obj->is_forwarded() && (obj->forwardee()== obj));
+    return result;
+  }
+
+  bool apply_to_weak_ref_discovered_field() { return true; }
+
+  template <class T> void do_oop_nv(T* p);
+  virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
+  virtual void do_oop(oop* p)       { do_oop_nv(p); }
+};
+
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1OOPCLOSURES_HPP
--- a/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -39,7 +39,8 @@
 // perf-critical inner loop.
 #define FILTERINTOCSCLOSURE_DOHISTOGRAMCOUNT 0
 
-template <class T> inline void FilterIntoCSClosure::do_oop_nv(T* p) {
+template <class T>
+inline void FilterIntoCSClosure::do_oop_nv(T* p) {
   T heap_oop = oopDesc::load_heap_oop(p);
   if (!oopDesc::is_null(heap_oop) &&
       _g1->obj_in_cs(oopDesc::decode_heap_oop_not_null(heap_oop))) {
@@ -53,7 +54,8 @@
 
 #define FILTEROUTOFREGIONCLOSURE_DOHISTOGRAMCOUNT 0
 
-template <class T> inline void FilterOutOfRegionClosure::do_oop_nv(T* p) {
+template <class T>
+inline void FilterOutOfRegionClosure::do_oop_nv(T* p) {
   T heap_oop = oopDesc::load_heap_oop(p);
   if (!oopDesc::is_null(heap_oop)) {
     HeapWord* obj_hw = (HeapWord*)oopDesc::decode_heap_oop_not_null(heap_oop);
@@ -67,7 +69,8 @@
 }
 
 // This closure is applied to the fields of the objects that have just been copied.
-template <class T> inline void G1ParScanClosure::do_oop_nv(T* p) {
+template <class T>
+inline void G1ParScanClosure::do_oop_nv(T* p) {
   T heap_oop = oopDesc::load_heap_oop(p);
 
   if (!oopDesc::is_null(heap_oop)) {
@@ -96,7 +99,8 @@
   }
 }
 
-template <class T> inline void G1ParPushHeapRSClosure::do_oop_nv(T* p) {
+template <class T>
+inline void G1ParPushHeapRSClosure::do_oop_nv(T* p) {
   T heap_oop = oopDesc::load_heap_oop(p);
 
   if (!oopDesc::is_null(heap_oop)) {
@@ -111,7 +115,8 @@
   }
 }
 
-template <class T> inline void G1CMOopClosure::do_oop_nv(T* p) {
+template <class T>
+inline void G1CMOopClosure::do_oop_nv(T* p) {
   assert(_g1h->is_in_g1_reserved((HeapWord*) p), "invariant");
   assert(!_g1h->is_on_master_free_list(
                     _g1h->heap_region_containing((HeapWord*) p)), "invariant");
@@ -125,4 +130,97 @@
   _task->deal_with_reference(obj);
 }
 
+template <class T>
+inline void G1RootRegionScanClosure::do_oop_nv(T* p) {
+  T heap_oop = oopDesc::load_heap_oop(p);
+  if (!oopDesc::is_null(heap_oop)) {
+    oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
+    HeapRegion* hr = _g1h->heap_region_containing((HeapWord*) obj);
+    if (hr != NULL) {
+      _cm->grayRoot(obj, obj->size(), _worker_id, hr);
+    }
+  }
+}
+
+template <class T>
+inline void G1Mux2Closure::do_oop_nv(T* p) {
+  // Apply first closure; then apply the second.
+  _c1->do_oop(p);
+  _c2->do_oop(p);
+}
+
+template <class T>
+inline void G1TriggerClosure::do_oop_nv(T* p) {
+  // Record that this closure was actually applied (triggered).
+  _triggered = true;
+}
+
+template <class T>
+inline void G1InvokeIfNotTriggeredClosure::do_oop_nv(T* p) {
+  if (!_trigger_cl->triggered()) {
+    _oop_cl->do_oop(p);
+  }
+}
+
+template <class T>
+inline void G1UpdateRSOrPushRefOopClosure::do_oop_nv(T* p) {
+  oop obj = oopDesc::load_decode_heap_oop(p);
+#ifdef ASSERT
+  // can't do because of races
+  // assert(obj == NULL || obj->is_oop(), "expected an oop");
+
+  // Do the safe subset of is_oop
+  if (obj != NULL) {
+#ifdef CHECK_UNHANDLED_OOPS
+    oopDesc* o = obj.obj();
+#else
+    oopDesc* o = obj;
+#endif // CHECK_UNHANDLED_OOPS
+    assert((intptr_t)o % MinObjAlignmentInBytes == 0, "not oop aligned");
+    assert(Universe::heap()->is_in_reserved(obj), "must be in heap");
+  }
+#endif // ASSERT
+
+  assert(_from != NULL, "from region must be non-NULL");
+
+  HeapRegion* to = _g1->heap_region_containing(obj);
+  if (to != NULL && _from != to) {
+    // The _record_refs_into_cset flag is true during the RSet
+    // updating part of an evacuation pause. It is false at all
+    // other times:
+    //  * rebuilding the rembered sets after a full GC
+    //  * during concurrent refinement.
+    //  * updating the remembered sets of regions in the collection
+    //    set in the event of an evacuation failure (when deferred
+    //    updates are enabled).
+
+    if (_record_refs_into_cset && to->in_collection_set()) {
+      // We are recording references that point into the collection
+      // set and this particular reference does exactly that...
+      // If the referenced object has already been forwarded
+      // to itself, we are handling an evacuation failure and
+      // we have already visited/tried to copy this object
+      // there is no need to retry.
+      if (!self_forwarded(obj)) {
+        assert(_push_ref_cl != NULL, "should not be null");
+        // Push the reference in the refs queue of the G1ParScanThreadState
+        // instance for this worker thread.
+        _push_ref_cl->do_oop(p);
+      }
+
+      // Deferred updates to the CSet are either discarded (in the normal case),
+      // or processed (if an evacuation failure occurs) at the end
+      // of the collection.
+      // See G1RemSet::cleanup_after_oops_into_collection_set_do().
+    } else {
+      // We either don't care about pushing references that point into the
+      // collection set (i.e. we're not during an evacuation pause) _or_
+      // the reference doesn't point into the collection set. Either way
+      // we add the reference directly to the RSet of the region containing
+      // the referenced object.
+      _g1_rem_set->par_write_ref(_from, p, _worker_i);
+    }
+  }
+}
+
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1OOPCLOSURES_INLINE_HPP
--- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -558,51 +558,37 @@
 }
 
 void G1RemSet::scrub_par(BitMap* region_bm, BitMap* card_bm,
-                                int worker_num, int claim_val) {
+                                uint worker_num, int claim_val) {
   ScrubRSClosure scrub_cl(region_bm, card_bm);
   _g1->heap_region_par_iterate_chunked(&scrub_cl,
                                        worker_num,
-                                       (int) n_workers(),
+                                       n_workers(),
                                        claim_val);
 }
 
 
 static IntHistogram out_of_histo(50, 50);
 
-class TriggerClosure : public OopClosure {
-  bool _trigger;
-public:
-  TriggerClosure() : _trigger(false) { }
-  bool value() const { return _trigger; }
-  template <class T> void do_oop_nv(T* p) { _trigger = true; }
-  virtual void do_oop(oop* p)        { do_oop_nv(p); }
-  virtual void do_oop(narrowOop* p)  { do_oop_nv(p); }
-};
+
+G1TriggerClosure::G1TriggerClosure() :
+  _triggered(false) { }
+
+G1InvokeIfNotTriggeredClosure::G1InvokeIfNotTriggeredClosure(G1TriggerClosure* t_cl,
+                                                             OopClosure* oop_cl)  :
+  _trigger_cl(t_cl), _oop_cl(oop_cl) { }
 
-class InvokeIfNotTriggeredClosure: public OopClosure {
-  TriggerClosure* _t;
-  OopClosure* _oc;
-public:
-  InvokeIfNotTriggeredClosure(TriggerClosure* t, OopClosure* oc):
-    _t(t), _oc(oc) { }
-  template <class T> void do_oop_nv(T* p) {
-    if (!_t->value()) _oc->do_oop(p);
-  }
-  virtual void do_oop(oop* p)        { do_oop_nv(p); }
-  virtual void do_oop(narrowOop* p)  { do_oop_nv(p); }
-};
+G1Mux2Closure::G1Mux2Closure(OopClosure *c1, OopClosure *c2) :
+  _c1(c1), _c2(c2) { }
 
-class Mux2Closure : public OopClosure {
-  OopClosure* _c1;
-  OopClosure* _c2;
-public:
-  Mux2Closure(OopClosure *c1, OopClosure *c2) : _c1(c1), _c2(c2) { }
-  template <class T> void do_oop_nv(T* p) {
-    _c1->do_oop(p); _c2->do_oop(p);
-  }
-  virtual void do_oop(oop* p)        { do_oop_nv(p); }
-  virtual void do_oop(narrowOop* p)  { do_oop_nv(p); }
-};
+G1UpdateRSOrPushRefOopClosure::
+G1UpdateRSOrPushRefOopClosure(G1CollectedHeap* g1h,
+                              G1RemSet* rs,
+                              OopsInHeapRegionClosure* push_ref_cl,
+                              bool record_refs_into_cset,
+                              int worker_i) :
+  _g1(g1h), _g1_rem_set(rs), _from(NULL),
+  _record_refs_into_cset(record_refs_into_cset),
+  _push_ref_cl(push_ref_cl), _worker_i(worker_i) { }
 
 bool G1RemSet::concurrentRefineOneCard_impl(jbyte* card_ptr, int worker_i,
                                                    bool check_for_refs_into_cset) {
@@ -629,17 +615,17 @@
     assert((size_t)worker_i < n_workers(), "index of worker larger than _cset_rs_update_cl[].length");
     oops_in_heap_closure = _cset_rs_update_cl[worker_i];
   }
-  UpdateRSOrPushRefOopClosure update_rs_oop_cl(_g1,
-                                               _g1->g1_rem_set(),
-                                               oops_in_heap_closure,
-                                               check_for_refs_into_cset,
-                                               worker_i);
+  G1UpdateRSOrPushRefOopClosure update_rs_oop_cl(_g1,
+                                                 _g1->g1_rem_set(),
+                                                 oops_in_heap_closure,
+                                                 check_for_refs_into_cset,
+                                                 worker_i);
   update_rs_oop_cl.set_from(r);
 
-  TriggerClosure trigger_cl;
+  G1TriggerClosure trigger_cl;
   FilterIntoCSClosure into_cs_cl(NULL, _g1, &trigger_cl);
-  InvokeIfNotTriggeredClosure invoke_cl(&trigger_cl, &into_cs_cl);
-  Mux2Closure mux(&invoke_cl, &update_rs_oop_cl);
+  G1InvokeIfNotTriggeredClosure invoke_cl(&trigger_cl, &into_cs_cl);
+  G1Mux2Closure mux(&invoke_cl, &update_rs_oop_cl);
 
   FilterOutOfRegionClosure filter_then_update_rs_oop_cl(r,
                         (check_for_refs_into_cset ?
@@ -688,7 +674,7 @@
     _conc_refine_cards++;
   }
 
-  return trigger_cl.value();
+  return trigger_cl.triggered();
 }
 
 bool G1RemSet::concurrentRefineOneCard(jbyte* card_ptr, int worker_i,
--- a/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -40,7 +40,7 @@
 protected:
   G1CollectedHeap* _g1;
   unsigned _conc_refine_cards;
-  size_t n_workers();
+  uint n_workers();
 
 protected:
   enum SomePrivateConstants {
@@ -122,7 +122,7 @@
   // parallel thread id of the current thread, and "claim_val" is the
   // value that should be used to claim heap regions.
   void scrub_par(BitMap* region_bm, BitMap* card_bm,
-                 int worker_num, int claim_val);
+                 uint worker_num, int claim_val);
 
   // Refine the card corresponding to "card_ptr".  If "sts" is non-NULL,
   // join and leave around parts that must be atomic wrt GC.  (NULL means
@@ -191,44 +191,5 @@
   virtual void do_oop(      oop* p) { do_oop_work(p); }
 };
 
-class UpdateRSOrPushRefOopClosure: public OopClosure {
-  G1CollectedHeap* _g1;
-  G1RemSet* _g1_rem_set;
-  HeapRegion* _from;
-  OopsInHeapRegionClosure* _push_ref_cl;
-  bool _record_refs_into_cset;
-  int _worker_i;
-
-  template <class T> void do_oop_work(T* p);
-
-public:
-  UpdateRSOrPushRefOopClosure(G1CollectedHeap* g1h,
-                              G1RemSet* rs,
-                              OopsInHeapRegionClosure* push_ref_cl,
-                              bool record_refs_into_cset,
-                              int worker_i = 0) :
-    _g1(g1h),
-    _g1_rem_set(rs),
-    _from(NULL),
-    _record_refs_into_cset(record_refs_into_cset),
-    _push_ref_cl(push_ref_cl),
-    _worker_i(worker_i) { }
-
-  void set_from(HeapRegion* from) {
-    assert(from != NULL, "from region must be non-NULL");
-    _from = from;
-  }
-
-  bool self_forwarded(oop obj) {
-    bool result = (obj->is_forwarded() && (obj->forwardee()== obj));
-    return result;
-  }
-
-  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
-  virtual void do_oop(oop* p)       { do_oop_work(p); }
-
-  bool apply_to_weak_ref_discovered_field() { return true; }
-};
-
 
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1REMSET_HPP
--- a/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -29,7 +29,7 @@
 #include "gc_implementation/g1/heapRegionRemSet.hpp"
 #include "oops/oop.inline.hpp"
 
-inline size_t G1RemSet::n_workers() {
+inline uint G1RemSet::n_workers() {
   if (_g1->workers() != NULL) {
     return _g1->workers()->total_workers();
   } else {
@@ -85,66 +85,4 @@
   }
 }
 
-template <class T>
-inline void UpdateRSOrPushRefOopClosure::do_oop_work(T* p) {
-  oop obj = oopDesc::load_decode_heap_oop(p);
-#ifdef ASSERT
-  // can't do because of races
-  // assert(obj == NULL || obj->is_oop(), "expected an oop");
-
-  // Do the safe subset of is_oop
-  if (obj != NULL) {
-#ifdef CHECK_UNHANDLED_OOPS
-    oopDesc* o = obj.obj();
-#else
-    oopDesc* o = obj;
-#endif // CHECK_UNHANDLED_OOPS
-    assert((intptr_t)o % MinObjAlignmentInBytes == 0, "not oop aligned");
-    assert(Universe::heap()->is_in_reserved(obj), "must be in heap");
-  }
-#endif // ASSERT
-
-  assert(_from != NULL, "from region must be non-NULL");
-
-  HeapRegion* to = _g1->heap_region_containing(obj);
-  if (to != NULL && _from != to) {
-    // The _record_refs_into_cset flag is true during the RSet
-    // updating part of an evacuation pause. It is false at all
-    // other times:
-    //  * rebuilding the rembered sets after a full GC
-    //  * during concurrent refinement.
-    //  * updating the remembered sets of regions in the collection
-    //    set in the event of an evacuation failure (when deferred
-    //    updates are enabled).
-
-    if (_record_refs_into_cset && to->in_collection_set()) {
-      // We are recording references that point into the collection
-      // set and this particular reference does exactly that...
-      // If the referenced object has already been forwarded
-      // to itself, we are handling an evacuation failure and
-      // we have already visited/tried to copy this object
-      // there is no need to retry.
-      if (!self_forwarded(obj)) {
-        assert(_push_ref_cl != NULL, "should not be null");
-        // Push the reference in the refs queue of the G1ParScanThreadState
-        // instance for this worker thread.
-        _push_ref_cl->do_oop(p);
-      }
-
-      // Deferred updates to the CSet are either discarded (in the normal case),
-      // or processed (if an evacuation failure occurs) at the end
-      // of the collection.
-      // See G1RemSet::cleanup_after_oops_into_collection_set_do().
-    } else {
-      // We either don't care about pushing references that point into the
-      // collection set (i.e. we're not during an evacuation pause) _or_
-      // the reference doesn't point into the collection set. Either way
-      // we add the reference directly to the RSet of the region containing
-      // the referenced object.
-      _g1_rem_set->par_write_ref(_from, p, _worker_i);
-    }
-  }
-}
-
-
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1REMSET_INLINE_HPP
--- a/src/share/vm/gc_implementation/g1/g1_globals.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -289,7 +289,31 @@
                                                                             \
   develop(uintx, G1ConcMarkForceOverflow, 0,                                \
           "The number of times we'll force an overflow during "             \
-          "concurrent marking")
+          "concurrent marking")                                             \
+                                                                            \
+  develop(uintx, G1DefaultMinNewGenPercent, 20,                             \
+          "Percentage (0-100) of the heap size to use as minimum "          \
+          "young gen size.")                                                \
+                                                                            \
+  develop(uintx, G1DefaultMaxNewGenPercent, 80,                             \
+          "Percentage (0-100) of the heap size to use as maximum "          \
+          "young gen size.")                                                \
+                                                                            \
+  develop(uintx, G1OldCSetRegionLiveThresholdPercent, 95,                   \
+          "Threshold for regions to be added to the collection set. "       \
+          "Regions with more live bytes that this will not be collected.")  \
+                                                                            \
+  develop(uintx, G1OldReclaimableThresholdPercent, 1,                       \
+          "Threshold for the remaining old reclaimable bytes, expressed "   \
+          "as a percentage of the heap size. If the old reclaimable bytes " \
+          "are under this we will not collect them with more mixed GCs.")   \
+                                                                            \
+  develop(uintx, G1MaxMixedGCNum, 4,                                        \
+          "The maximum desired number of mixed GCs after a marking cycle.") \
+                                                                            \
+  develop(uintx, G1OldCSetRegionThresholdPercent, 10,                       \
+          "An upper bound for the number of old CSet regions expressed "    \
+          "as a percentage of the heap size.")
 
 G1_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG)
 
--- a/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -32,12 +32,14 @@
 
 // Forward declarations.
 enum G1Barrier {
-  G1BarrierNone, G1BarrierRS, G1BarrierEvac
+  G1BarrierNone,
+  G1BarrierRS,
+  G1BarrierEvac
 };
 
-template<bool do_gen_barrier, G1Barrier barrier,
-         bool do_mark_object>
+template<bool do_gen_barrier, G1Barrier barrier, bool do_mark_object>
 class G1ParCopyClosure;
+
 class G1ParScanClosure;
 class G1ParPushHeapRSClosure;
 
@@ -46,6 +48,13 @@
 class FilterIntoCSClosure;
 class FilterOutOfRegionClosure;
 class G1CMOopClosure;
+class G1RootRegionScanClosure;
+
+// Specialized oop closures from g1RemSet.cpp
+class G1Mux2Closure;
+class G1TriggerClosure;
+class G1InvokeIfNotTriggeredClosure;
+class G1UpdateRSOrPushRefOopClosure;
 
 #ifdef FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES
 #error "FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES already defined."
@@ -57,7 +66,12 @@
       f(G1ParPushHeapRSClosure,_nv)                     \
       f(FilterIntoCSClosure,_nv)                        \
       f(FilterOutOfRegionClosure,_nv)                   \
-      f(G1CMOopClosure,_nv)
+      f(G1CMOopClosure,_nv)                             \
+      f(G1RootRegionScanClosure,_nv)                    \
+      f(G1Mux2Closure,_nv)                              \
+      f(G1TriggerClosure,_nv)                           \
+      f(G1InvokeIfNotTriggeredClosure,_nv)              \
+      f(G1UpdateRSOrPushRefOopClosure,_nv)
 
 #ifdef FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES
 #error "FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES already defined."
--- a/src/share/vm/gc_implementation/g1/heapRegion.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/g1/heapRegion.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -94,7 +94,8 @@
 #endif // PRODUCT
   }
 
-  template <class T> void do_oop_work(T* p) {
+  template <class T>
+  void do_oop_work(T* p) {
     assert(_containing_obj != NULL, "Precondition");
     assert(!_g1h->is_obj_dead_cond(_containing_obj, _vo),
            "Precondition");
@@ -102,8 +103,10 @@
     if (!oopDesc::is_null(heap_oop)) {
       oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
       bool failed = false;
-      if (!_g1h->is_in_closed_subset(obj) ||
-          _g1h->is_obj_dead_cond(obj, _vo)) {
+      if (!_g1h->is_in_closed_subset(obj) || _g1h->is_obj_dead_cond(obj, _vo)) {
+        MutexLockerEx x(ParGCRareEvent_lock,
+                        Mutex::_no_safepoint_check_flag);
+
         if (!_failures) {
           gclog_or_tty->print_cr("");
           gclog_or_tty->print_cr("----------");
@@ -133,6 +136,7 @@
           print_object(gclog_or_tty, obj);
         }
         gclog_or_tty->print_cr("----------");
+        gclog_or_tty->flush();
         _failures = true;
         failed = true;
         _n_failures++;
@@ -155,6 +159,9 @@
                                   cv_field == dirty
                                : cv_obj == dirty || cv_field == dirty));
           if (is_bad) {
+            MutexLockerEx x(ParGCRareEvent_lock,
+                            Mutex::_no_safepoint_check_flag);
+
             if (!_failures) {
               gclog_or_tty->print_cr("");
               gclog_or_tty->print_cr("----------");
@@ -174,6 +181,7 @@
             gclog_or_tty->print_cr("Obj head CTE = %d, field CTE = %d.",
                           cv_obj, cv_field);
             gclog_or_tty->print_cr("----------");
+            gclog_or_tty->flush();
             _failures = true;
             if (!failed) _n_failures++;
           }
@@ -379,13 +387,12 @@
   ct_bs->clear(MemRegion(bottom(), end()));
 }
 
-// <PREDICTION>
 void HeapRegion::calc_gc_efficiency() {
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
-  _gc_efficiency = (double) garbage_bytes() /
-                            g1h->predict_region_elapsed_time_ms(this, false);
+  G1CollectorPolicy* g1p = g1h->g1_policy();
+  _gc_efficiency = (double) reclaimable_bytes() /
+                            g1p->predict_region_elapsed_time_ms(this, false);
 }
-// </PREDICTION>
 
 void HeapRegion::set_startsHumongous(HeapWord* new_top, HeapWord* new_end) {
   assert(!isHumongous(), "sanity / pre-condition");
@@ -567,6 +574,40 @@
   oops_in_mr_iterate(MemRegion(bottom(), saved_mark_word()), cl);
 }
 
+void HeapRegion::note_self_forwarding_removal_start(bool during_initial_mark,
+                                                    bool during_conc_mark) {
+  // We always recreate the prev marking info and we'll explicitly
+  // mark all objects we find to be self-forwarded on the prev
+  // bitmap. So all objects need to be below PTAMS.
+  _prev_top_at_mark_start = top();
+  _prev_marked_bytes = 0;
+
+  if (during_initial_mark) {
+    // During initial-mark, we'll also explicitly mark all objects
+    // we find to be self-forwarded on the next bitmap. So all
+    // objects need to be below NTAMS.
+    _next_top_at_mark_start = top();
+    set_top_at_conc_mark_count(bottom());
+    _next_marked_bytes = 0;
+  } else if (during_conc_mark) {
+    // During concurrent mark, all objects in the CSet (including
+    // the ones we find to be self-forwarded) are implicitly live.
+    // So all objects need to be above NTAMS.
+    _next_top_at_mark_start = bottom();
+    set_top_at_conc_mark_count(bottom());
+    _next_marked_bytes = 0;
+  }
+}
+
+void HeapRegion::note_self_forwarding_removal_end(bool during_initial_mark,
+                                                  bool during_conc_mark,
+                                                  size_t marked_bytes) {
+  assert(0 <= marked_bytes && marked_bytes <= used(),
+         err_msg("marked: "SIZE_FORMAT" used: "SIZE_FORMAT,
+                 marked_bytes, used()));
+  _prev_marked_bytes = marked_bytes;
+}
+
 HeapWord*
 HeapRegion::object_iterate_mem_careful(MemRegion mr,
                                                  ObjectClosure* cl) {
@@ -617,7 +658,7 @@
   // If we're within a stop-world GC, then we might look at a card in a
   // GC alloc region that extends onto a GC LAB, which may not be
   // parseable.  Stop such at the "saved_mark" of the region.
-  if (G1CollectedHeap::heap()->is_gc_active()) {
+  if (g1h->is_gc_active()) {
     mr = mr.intersection(used_region_at_save_marks());
   } else {
     mr = mr.intersection(used_region());
@@ -646,53 +687,63 @@
     OrderAccess::storeload();
   }
 
+  // Cache the boundaries of the memory region in some const locals
+  HeapWord* const start = mr.start();
+  HeapWord* const end = mr.end();
+
   // We used to use "block_start_careful" here.  But we're actually happy
   // to update the BOT while we do this...
-  HeapWord* cur = block_start(mr.start());
-  assert(cur <= mr.start(), "Postcondition");
+  HeapWord* cur = block_start(start);
+  assert(cur <= start, "Postcondition");
+
+  oop obj;
 
-  while (cur <= mr.start()) {
-    if (oop(cur)->klass_or_null() == NULL) {
+  HeapWord* next = cur;
+  while (next <= start) {
+    cur = next;
+    obj = oop(cur);
+    if (obj->klass_or_null() == NULL) {
       // Ran into an unparseable point.
       return cur;
     }
     // Otherwise...
-    int sz = oop(cur)->size();
-    if (cur + sz > mr.start()) break;
-    // Otherwise, go on.
-    cur = cur + sz;
+    next = (cur + obj->size());
   }
-  oop obj;
-  obj = oop(cur);
-  // If we finish this loop...
-  assert(cur <= mr.start()
-         && obj->klass_or_null() != NULL
-         && cur + obj->size() > mr.start(),
+
+  // If we finish the above loop...We have a parseable object that
+  // begins on or before the start of the memory region, and ends
+  // inside or spans the entire region.
+
+  assert(obj == oop(cur), "sanity");
+  assert(cur <= start &&
+         obj->klass_or_null() != NULL &&
+         (cur + obj->size()) > start,
          "Loop postcondition");
+
   if (!g1h->is_obj_dead(obj)) {
     obj->oop_iterate(cl, mr);
   }
 
-  HeapWord* next;
-  while (cur < mr.end()) {
+  while (cur < end) {
     obj = oop(cur);
     if (obj->klass_or_null() == NULL) {
       // Ran into an unparseable point.
       return cur;
     };
+
     // Otherwise:
     next = (cur + obj->size());
+
     if (!g1h->is_obj_dead(obj)) {
-      if (next < mr.end()) {
+      if (next < end || !obj->is_objArray()) {
+        // This object either does not span the MemRegion
+        // boundary, or if it does it's not an array.
+        // Apply closure to whole object.
         obj->oop_iterate(cl);
       } else {
-        // this obj spans the boundary.  If it's an array, stop at the
-        // boundary.
-        if (obj->is_objArray()) {
-          obj->oop_iterate(cl, mr);
-        } else {
-          obj->oop_iterate(cl);
-        }
+        // This obj is an array that spans the boundary.
+        // Stop at the boundary.
+        obj->oop_iterate(cl, mr);
       }
     }
     cur = next;
--- a/src/share/vm/gc_implementation/g1/heapRegion.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/g1/heapRegion.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -373,7 +373,10 @@
     ScrubRemSetClaimValue      = 3,
     ParVerifyClaimValue        = 4,
     RebuildRSClaimValue        = 5,
-    CompleteMarkCSetClaimValue = 6
+    CompleteMarkCSetClaimValue = 6,
+    ParEvacFailureClaimValue   = 7,
+    AggregateCountClaimValue   = 8,
+    VerifyCountClaimValue      = 9
   };
 
   inline HeapWord* par_allocate_no_bot_updates(size_t word_size) {
@@ -412,6 +415,16 @@
     return used_at_mark_start_bytes - marked_bytes();
   }
 
+  // Return the amount of bytes we'll reclaim if we collect this
+  // region. This includes not only the known garbage bytes in the
+  // region but also any unallocated space in it, i.e., [top, end),
+  // since it will also be reclaimed if we collect the region.
+  size_t reclaimable_bytes() {
+    size_t known_live_bytes = live_bytes();
+    assert(known_live_bytes <= capacity(), "sanity");
+    return capacity() - known_live_bytes;
+  }
+
   // An upper bound on the number of live bytes in the region.
   size_t max_live_bytes() { return used() - garbage_bytes(); }
 
@@ -582,37 +595,33 @@
   // that the collector is about to start or has finished (concurrently)
   // marking the heap.
 
-  // Note the start of a marking phase. Record the
-  // start of the unmarked area of the region here.
-  void note_start_of_marking(bool during_initial_mark) {
-    init_top_at_conc_mark_count();
-    _next_marked_bytes = 0;
-    if (during_initial_mark && is_young() && !is_survivor())
-      _next_top_at_mark_start = bottom();
-    else
-      _next_top_at_mark_start = top();
-  }
+  // Notify the region that concurrent marking is starting. Initialize
+  // all fields related to the next marking info.
+  inline void note_start_of_marking();
+
+  // Notify the region that concurrent marking has finished. Copy the
+  // (now finalized) next marking info fields into the prev marking
+  // info fields.
+  inline void note_end_of_marking();
+
+  // Notify the region that it will be used as to-space during a GC
+  // and we are about to start copying objects into it.
+  inline void note_start_of_copying(bool during_initial_mark);
 
-  // Note the end of a marking phase. Install the start of
-  // the unmarked area that was captured at start of marking.
-  void note_end_of_marking() {
-    _prev_top_at_mark_start = _next_top_at_mark_start;
-    _prev_marked_bytes = _next_marked_bytes;
-    _next_marked_bytes = 0;
+  // Notify the region that it ceases being to-space during a GC and
+  // we will not copy objects into it any more.
+  inline void note_end_of_copying(bool during_initial_mark);
 
-    guarantee(_prev_marked_bytes <=
-              (size_t) (prev_top_at_mark_start() - bottom()) * HeapWordSize,
-              "invariant");
-  }
+  // Notify the region that we are about to start processing
+  // self-forwarded objects during evac failure handling.
+  void note_self_forwarding_removal_start(bool during_initial_mark,
+                                          bool during_conc_mark);
 
-  // After an evacuation, we need to update _next_top_at_mark_start
-  // to be the current top.  Note this is only valid if we have only
-  // ever evacuated into this region.  If we evacuate, allocate, and
-  // then evacuate we are in deep doodoo.
-  void note_end_of_copying() {
-    assert(top() >= _next_top_at_mark_start, "Increase only");
-    _next_top_at_mark_start = top();
-  }
+  // Notify the region that we have finished processing self-forwarded
+  // objects during evac failure handling.
+  void note_self_forwarding_removal_end(bool during_initial_mark,
+                                        bool during_conc_mark,
+                                        size_t marked_bytes);
 
   // Returns "false" iff no object in the region was allocated when the
   // last mark phase ended.
@@ -649,10 +658,8 @@
     init_top_at_mark_start();
   }
 
-  // <PREDICTION>
   void calc_gc_efficiency(void);
   double gc_efficiency() { return _gc_efficiency;}
-  // </PREDICTION>
 
   bool is_young() const     { return _young_type != NotYoung; }
   bool is_survivor() const  { return _young_type == Survivor; }
--- a/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -55,4 +55,63 @@
   return _offsets.block_start_const(p);
 }
 
+inline void HeapRegion::note_start_of_marking() {
+  init_top_at_conc_mark_count();
+  _next_marked_bytes = 0;
+  _next_top_at_mark_start = top();
+}
+
+inline void HeapRegion::note_end_of_marking() {
+  _prev_top_at_mark_start = _next_top_at_mark_start;
+  _prev_marked_bytes = _next_marked_bytes;
+  _next_marked_bytes = 0;
+
+  assert(_prev_marked_bytes <=
+         (size_t) pointer_delta(prev_top_at_mark_start(), bottom()) *
+         HeapWordSize, "invariant");
+}
+
+inline void HeapRegion::note_start_of_copying(bool during_initial_mark) {
+  if (is_survivor()) {
+    // This is how we always allocate survivors.
+    assert(_next_top_at_mark_start == bottom(), "invariant");
+  } else {
+    if (during_initial_mark) {
+      // During initial-mark we'll explicitly mark any objects on old
+      // regions that are pointed to by roots. Given that explicit
+      // marks only make sense under NTAMS it'd be nice if we could
+      // check that condition if we wanted to. Given that we don't
+      // know where the top of this region will end up, we simply set
+      // NTAMS to the end of the region so all marks will be below
+      // NTAMS. We'll set it to the actual top when we retire this region.
+      _next_top_at_mark_start = end();
+    } else {
+      // We could have re-used this old region as to-space over a
+      // couple of GCs since the start of the concurrent marking
+      // cycle. This means that [bottom,NTAMS) will contain objects
+      // copied up to and including initial-mark and [NTAMS, top)
+      // will contain objects copied during the concurrent marking cycle.
+      assert(top() >= _next_top_at_mark_start, "invariant");
+    }
+  }
+}
+
+inline void HeapRegion::note_end_of_copying(bool during_initial_mark) {
+  if (is_survivor()) {
+    // This is how we always allocate survivors.
+    assert(_next_top_at_mark_start == bottom(), "invariant");
+  } else {
+    if (during_initial_mark) {
+      // See the comment for note_start_of_copying() for the details
+      // on this.
+      assert(_next_top_at_mark_start == end(), "pre-condition");
+      _next_top_at_mark_start = top();
+    } else {
+      // See the comment for note_start_of_copying() for the details
+      // on this.
+      assert(top() >= _next_top_at_mark_start, "invariant");
+    }
+  }
+}
+
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGION_INLINE_HPP
--- a/src/share/vm/gc_implementation/g1/heapRegionSet.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/g1/heapRegionSet.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -59,6 +59,7 @@
 class HeapRegionSetBase VALUE_OBJ_CLASS_SPEC {
   friend class hrs_ext_msg;
   friend class HRSPhaseSetter;
+  friend class VMStructs;
 
 protected:
   static size_t calculate_region_num(HeapRegion* hr);
--- a/src/share/vm/gc_implementation/g1/ptrQueue.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/g1/ptrQueue.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -70,7 +70,7 @@
   // given PtrQueueSet.
   PtrQueue(PtrQueueSet* qset, bool perm = false, bool active = false);
   // Release any contained resources.
-  void flush();
+  virtual void flush();
   // Calls flush() when destroyed.
   ~PtrQueue() { flush(); }
 
--- a/src/share/vm/gc_implementation/g1/satbQueue.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/g1/satbQueue.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -31,6 +31,14 @@
 #include "runtime/thread.hpp"
 #include "runtime/vmThread.hpp"
 
+void ObjPtrQueue::flush() {
+  // The buffer might contain refs into the CSet. We have to filter it
+  // first before we flush it, otherwise we might end up with an
+  // enqueued buffer with refs into the CSet which breaks our invariants.
+  filter();
+  PtrQueue::flush();
+}
+
 // This method removes entries from an SATB buffer that will not be
 // useful to the concurrent marking threads. An entry is removed if it
 // satisfies one of the following conditions:
@@ -44,38 +52,27 @@
 //     process it again).
 //
 // The rest of the entries will be retained and are compacted towards
-// the top of the buffer. If with this filtering we clear a large
-// enough chunk of the buffer we can re-use it (instead of enqueueing
-// it) and we can just allow the mutator to carry on executing.
-
-bool ObjPtrQueue::should_enqueue_buffer() {
-  assert(_lock == NULL || _lock->owned_by_self(),
-         "we should have taken the lock before calling this");
+// the top of the buffer. Note that, because we do not allow old
+// regions in the CSet during marking, all objects on the CSet regions
+// are young (eden or survivors) and therefore implicitly live. So any
+// references into the CSet will be removed during filtering.
 
-  // A value of 0 means "don't filter SATB buffers".
-  if (G1SATBBufferEnqueueingThresholdPercent == 0) {
-    return true;
-  }
-
+void ObjPtrQueue::filter() {
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
-
-  // This method should only be called if there is a non-NULL buffer
-  // that is full.
-  assert(_index == 0, "pre-condition");
-  assert(_buf != NULL, "pre-condition");
-
   void** buf = _buf;
   size_t sz = _sz;
 
+  if (buf == NULL) {
+    // nothing to do
+    return;
+  }
+
   // Used for sanity checking at the end of the loop.
   debug_only(size_t entries = 0; size_t retained = 0;)
 
   size_t i = sz;
   size_t new_index = sz;
 
-  // Given that we are expecting _index == 0, we could have changed
-  // the loop condition to (i > 0). But we are using _index for
-  // generality.
   while (i > _index) {
     assert(i > 0, "we should have at least one more entry to process");
     i -= oopSize;
@@ -103,22 +100,58 @@
       debug_only(retained += 1;)
     }
   }
+
+#ifdef ASSERT
   size_t entries_calc = (sz - _index) / oopSize;
   assert(entries == entries_calc, "the number of entries we counted "
          "should match the number of entries we calculated");
   size_t retained_calc = (sz - new_index) / oopSize;
   assert(retained == retained_calc, "the number of retained entries we counted "
          "should match the number of retained entries we calculated");
-  size_t perc = retained_calc * 100 / entries_calc;
+#endif // ASSERT
+
+  _index = new_index;
+}
+
+// This method will first apply the above filtering to the buffer. If
+// post-filtering a large enough chunk of the buffer has been cleared
+// we can re-use the buffer (instead of enqueueing it) and we can just
+// allow the mutator to carry on executing using the same buffer
+// instead of replacing it.
+
+bool ObjPtrQueue::should_enqueue_buffer() {
+  assert(_lock == NULL || _lock->owned_by_self(),
+         "we should have taken the lock before calling this");
+
+  // Even if G1SATBBufferEnqueueingThresholdPercent == 0 we have to
+  // filter the buffer given that this will remove any references into
+  // the CSet as we currently assume that no such refs will appear in
+  // enqueued buffers.
+
+  // This method should only be called if there is a non-NULL buffer
+  // that is full.
+  assert(_index == 0, "pre-condition");
+  assert(_buf != NULL, "pre-condition");
+
+  filter();
+
+  size_t sz = _sz;
+  size_t all_entries = sz / oopSize;
+  size_t retained_entries = (sz - _index) / oopSize;
+  size_t perc = retained_entries * 100 / all_entries;
   bool should_enqueue = perc > (size_t) G1SATBBufferEnqueueingThresholdPercent;
-  _index = new_index;
-
   return should_enqueue;
 }
 
 void ObjPtrQueue::apply_closure(ObjectClosure* cl) {
   if (_buf != NULL) {
     apply_closure_to_buffer(cl, _buf, _index, _sz);
+  }
+}
+
+void ObjPtrQueue::apply_closure_and_empty(ObjectClosure* cl) {
+  if (_buf != NULL) {
+    apply_closure_to_buffer(cl, _buf, _index, _sz);
     _index = _sz;
   }
 }
@@ -135,6 +168,21 @@
   }
 }
 
+#ifndef PRODUCT
+// Helpful for debugging
+
+void ObjPtrQueue::print(const char* name) {
+  print(name, _buf, _index, _sz);
+}
+
+void ObjPtrQueue::print(const char* name,
+                        void** buf, size_t index, size_t sz) {
+  gclog_or_tty->print_cr("  SATB BUFFER [%s] buf: "PTR_FORMAT" "
+                         "index: "SIZE_FORMAT" sz: "SIZE_FORMAT,
+                         name, buf, index, sz);
+}
+#endif // PRODUCT
+
 #ifdef ASSERT
 void ObjPtrQueue::verify_oops_in_buffer() {
   if (_buf == NULL) return;
@@ -150,12 +198,9 @@
 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 #endif // _MSC_VER
 
-
 SATBMarkQueueSet::SATBMarkQueueSet() :
-  PtrQueueSet(),
-  _closure(NULL), _par_closures(NULL),
-  _shared_satb_queue(this, true /*perm*/)
-{}
+  PtrQueueSet(), _closure(NULL), _par_closures(NULL),
+  _shared_satb_queue(this, true /*perm*/) { }
 
 void SATBMarkQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock,
                                   int process_completed_threshold,
@@ -167,7 +212,6 @@
   }
 }
 
-
 void SATBMarkQueueSet::handle_zero_index_for_thread(JavaThread* t) {
   DEBUG_ONLY(t->satb_mark_queue().verify_oops_in_buffer();)
   t->satb_mark_queue().handle_zero_index();
@@ -228,6 +272,13 @@
   }
 }
 
+void SATBMarkQueueSet::filter_thread_buffers() {
+  for(JavaThread* t = Threads::first(); t; t = t->next()) {
+    t->satb_mark_queue().filter();
+  }
+  shared_satb_queue()->filter();
+}
+
 void SATBMarkQueueSet::set_closure(ObjectClosure* closure) {
   _closure = closure;
 }
@@ -239,9 +290,9 @@
 
 void SATBMarkQueueSet::iterate_closure_all_threads() {
   for(JavaThread* t = Threads::first(); t; t = t->next()) {
-    t->satb_mark_queue().apply_closure(_closure);
+    t->satb_mark_queue().apply_closure_and_empty(_closure);
   }
-  shared_satb_queue()->apply_closure(_closure);
+  shared_satb_queue()->apply_closure_and_empty(_closure);
 }
 
 void SATBMarkQueueSet::par_iterate_closure_all_threads(int worker) {
@@ -250,7 +301,7 @@
 
   for(JavaThread* t = Threads::first(); t; t = t->next()) {
     if (t->claim_oops_do(true, parity)) {
-      t->satb_mark_queue().apply_closure(_par_closures[worker]);
+      t->satb_mark_queue().apply_closure_and_empty(_par_closures[worker]);
     }
   }
 
@@ -264,7 +315,7 @@
 
   VMThread* vmt = VMThread::vm_thread();
   if (vmt->claim_oops_do(true, parity)) {
-    shared_satb_queue()->apply_closure(_par_closures[worker]);
+    shared_satb_queue()->apply_closure_and_empty(_par_closures[worker]);
   }
 }
 
@@ -292,6 +343,61 @@
   }
 }
 
+void SATBMarkQueueSet::iterate_completed_buffers_read_only(ObjectClosure* cl) {
+  assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
+  assert(cl != NULL, "pre-condition");
+
+  BufferNode* nd = _completed_buffers_head;
+  while (nd != NULL) {
+    void** buf = BufferNode::make_buffer_from_node(nd);
+    ObjPtrQueue::apply_closure_to_buffer(cl, buf, 0, _sz);
+    nd = nd->next();
+  }
+}
+
+void SATBMarkQueueSet::iterate_thread_buffers_read_only(ObjectClosure* cl) {
+  assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
+  assert(cl != NULL, "pre-condition");
+
+  for (JavaThread* t = Threads::first(); t; t = t->next()) {
+    t->satb_mark_queue().apply_closure(cl);
+  }
+  shared_satb_queue()->apply_closure(cl);
+}
+
+#ifndef PRODUCT
+// Helpful for debugging
+
+#define SATB_PRINTER_BUFFER_SIZE 256
+
+void SATBMarkQueueSet::print_all(const char* msg) {
+  char buffer[SATB_PRINTER_BUFFER_SIZE];
+  assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
+
+  gclog_or_tty->cr();
+  gclog_or_tty->print_cr("SATB BUFFERS [%s]", msg);
+
+  BufferNode* nd = _completed_buffers_head;
+  int i = 0;
+  while (nd != NULL) {
+    void** buf = BufferNode::make_buffer_from_node(nd);
+    jio_snprintf(buffer, SATB_PRINTER_BUFFER_SIZE, "Enqueued: %d", i);
+    ObjPtrQueue::print(buffer, buf, 0, _sz);
+    nd = nd->next();
+    i += 1;
+  }
+
+  for (JavaThread* t = Threads::first(); t; t = t->next()) {
+    jio_snprintf(buffer, SATB_PRINTER_BUFFER_SIZE, "Thread: %s", t->name());
+    t->satb_mark_queue().print(buffer);
+  }
+
+  shared_satb_queue()->print("Shared");
+
+  gclog_or_tty->cr();
+}
+#endif // PRODUCT
+
 void SATBMarkQueueSet::abandon_partial_marking() {
   BufferNode* buffers_to_delete = NULL;
   {
@@ -316,5 +422,5 @@
   for (JavaThread* t = Threads::first(); t; t = t->next()) {
     t->satb_mark_queue().reset();
   }
-  shared_satb_queue()->reset();
+ shared_satb_queue()->reset();
 }
--- a/src/share/vm/gc_implementation/g1/satbQueue.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/g1/satbQueue.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -29,9 +29,26 @@
 
 class ObjectClosure;
 class JavaThread;
+class SATBMarkQueueSet;
 
 // A ptrQueue whose elements are "oops", pointers to object heads.
 class ObjPtrQueue: public PtrQueue {
+  friend class SATBMarkQueueSet;
+
+private:
+  // Filter out unwanted entries from the buffer.
+  void filter();
+
+  // Apply the closure to all elements.
+  void apply_closure(ObjectClosure* cl);
+
+  // Apply the closure to all elements and empty the buffer;
+  void apply_closure_and_empty(ObjectClosure* cl);
+
+  // Apply the closure to all elements of "buf", down to "index" (inclusive.)
+  static void apply_closure_to_buffer(ObjectClosure* cl,
+                                      void** buf, size_t index, size_t sz);
+
 public:
   ObjPtrQueue(PtrQueueSet* qset, bool perm = false) :
     // SATB queues are only active during marking cycles. We create
@@ -41,23 +58,23 @@
     // field to true. This is done in JavaThread::initialize_queues().
     PtrQueue(qset, perm, false /* active */) { }
 
+  // Overrides PtrQueue::flush() so that it can filter the buffer
+  // before it is flushed.
+  virtual void flush();
+
   // Overrides PtrQueue::should_enqueue_buffer(). See the method's
   // definition for more information.
   virtual bool should_enqueue_buffer();
 
-  // Apply the closure to all elements, and reset the index to make the
-  // buffer empty.
-  void apply_closure(ObjectClosure* cl);
-
-  // Apply the closure to all elements of "buf", down to "index" (inclusive.)
-  static void apply_closure_to_buffer(ObjectClosure* cl,
-                                      void** buf, size_t index, size_t sz);
+#ifndef PRODUCT
+  // Helpful for debugging
+  void print(const char* name);
+  static void print(const char* name, void** buf, size_t index, size_t sz);
+#endif // PRODUCT
 
   void verify_oops_in_buffer() NOT_DEBUG_RETURN;
 };
 
-
-
 class SATBMarkQueueSet: public PtrQueueSet {
   ObjectClosure* _closure;
   ObjectClosure** _par_closures;  // One per ParGCThread.
@@ -88,6 +105,9 @@
   // set itself, has an active value same as expected_active.
   void set_active_all_threads(bool b, bool expected_active);
 
+  // Filter all the currently-active SATB buffers.
+  void filter_thread_buffers();
+
   // Register "blk" as "the closure" for all queues.  Only one such closure
   // is allowed.  The "apply_closure_to_completed_buffer" method will apply
   // this closure to a completed buffer, and "iterate_closure_all_threads"
@@ -98,10 +118,9 @@
   // closures, one for each parallel GC thread.
   void set_par_closure(int i, ObjectClosure* closure);
 
-  // If there is a registered closure for buffers, apply it to all entries
-  // in all currently-active buffers.  This should only be applied at a
-  // safepoint.  (Currently must not be called in parallel; this should
-  // change in the future.)
+  // Apply the registered closure to all entries on each
+  // currently-active buffer and then empty the buffer. It should only
+  // be called serially and at a safepoint.
   void iterate_closure_all_threads();
   // Parallel version of the above.
   void par_iterate_closure_all_threads(int worker);
@@ -117,11 +136,21 @@
     return apply_closure_to_completed_buffer_work(true, worker);
   }
 
+  // Apply the given closure on enqueued and currently-active buffers
+  // respectively. Both methods are read-only, i.e., they do not
+  // modify any of the buffers.
+  void iterate_completed_buffers_read_only(ObjectClosure* cl);
+  void iterate_thread_buffers_read_only(ObjectClosure* cl);
+
+#ifndef PRODUCT
+  // Helpful for debugging
+  void print_all(const char* msg);
+#endif // PRODUCT
+
   ObjPtrQueue* shared_satb_queue() { return &_shared_satb_queue; }
 
   // If a marking is being abandoned, reset any unprocessed log buffers.
   void abandon_partial_marking();
-
 };
 
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_SATBQUEUE_HPP
--- a/src/share/vm/gc_implementation/g1/vmStructs_g1.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/g1/vmStructs_g1.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -40,6 +40,8 @@
   nonstatic_field(G1CollectedHeap, _g1_committed,       MemRegion)            \
   nonstatic_field(G1CollectedHeap, _summary_bytes_used, size_t)               \
   nonstatic_field(G1CollectedHeap, _g1mm,               G1MonitoringSupport*) \
+  nonstatic_field(G1CollectedHeap, _old_set,            HeapRegionSetBase)    \
+  nonstatic_field(G1CollectedHeap, _humongous_set,      HeapRegionSetBase)    \
                                                                               \
   nonstatic_field(G1MonitoringSupport, _eden_committed,     size_t)           \
   nonstatic_field(G1MonitoringSupport, _eden_used,          size_t)           \
@@ -47,6 +49,10 @@
   nonstatic_field(G1MonitoringSupport, _survivor_used,      size_t)           \
   nonstatic_field(G1MonitoringSupport, _old_committed,      size_t)           \
   nonstatic_field(G1MonitoringSupport, _old_used,           size_t)           \
+                                                                              \
+  nonstatic_field(HeapRegionSetBase,   _length,             size_t)           \
+  nonstatic_field(HeapRegionSetBase,   _region_num,         size_t)           \
+  nonstatic_field(HeapRegionSetBase,   _total_used_bytes,   size_t)           \
 
 
 #define VM_TYPES_G1(declare_type, declare_toplevel_type)                      \
@@ -55,6 +61,7 @@
                                                                               \
   declare_type(HeapRegion, ContiguousSpace)                                   \
   declare_toplevel_type(HeapRegionSeq)                                        \
+  declare_toplevel_type(HeapRegionSetBase)                                    \
   declare_toplevel_type(G1MonitoringSupport)                                  \
                                                                               \
   declare_toplevel_type(G1CollectedHeap*)                                     \
--- a/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -74,8 +74,9 @@
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   assert(!_should_initiate_conc_mark ||
   ((_gc_cause == GCCause::_gc_locker && GCLockerInvokesConcurrent) ||
-   (_gc_cause == GCCause::_java_lang_system_gc && ExplicitGCInvokesConcurrent)),
-         "only a GC locker or a System.gc() induced GC should start a cycle");
+   (_gc_cause == GCCause::_java_lang_system_gc && ExplicitGCInvokesConcurrent) ||
+    _gc_cause == GCCause::_g1_humongous_allocation),
+         "only a GC locker, a System.gc() or a hum allocation induced GC should start a cycle");
 
   if (_word_size > 0) {
     // An allocation has been requested. So, try to do that first.
--- a/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -56,14 +56,14 @@
                           lowest_non_clean_base_chunk_index,
                           lowest_non_clean_chunk_size);
 
-  int n_strides = n_threads * ParGCStridesPerThread;
+  uint n_strides = n_threads * ParGCStridesPerThread;
   SequentialSubTasksDone* pst = sp->par_seq_tasks();
   // Sets the condition for completion of the subtask (how many threads
   // need to finish in order to be done).
   pst->set_n_threads(n_threads);
   pst->set_n_tasks(n_strides);
 
-  int stride = 0;
+  uint stride = 0;
   while (!pst->is_task_claimed(/* reference */ stride)) {
     process_stride(sp, mr, stride, n_strides, cl, ct,
                    lowest_non_clean,
--- a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -590,7 +590,7 @@
 // called after a task is started.  So "i" is based on
 // first-come-first-served.
 
-void ParNewGenTask::work(int i) {
+void ParNewGenTask::work(uint worker_id) {
   GenCollectedHeap* gch = GenCollectedHeap::heap();
   // Since this is being done in a separate thread, need new resource
   // and handle marks.
@@ -601,8 +601,8 @@
 
   Generation* old_gen = gch->next_gen(_gen);
 
-  ParScanThreadState& par_scan_state = _state_set->thread_state(i);
-  assert(_state_set->is_valid(i), "Should not have been called");
+  ParScanThreadState& par_scan_state = _state_set->thread_state(worker_id);
+  assert(_state_set->is_valid(worker_id), "Should not have been called");
 
   par_scan_state.set_young_old_boundary(_young_old_boundary);
 
@@ -755,7 +755,7 @@
                          ParScanThreadStateSet& state_set);
 
 private:
-  virtual void work(int i);
+  virtual void work(uint worker_id);
   virtual void set_for_termination(int active_workers) {
     _state_set.terminator()->reset_for_reuse(active_workers);
   }
@@ -781,13 +781,13 @@
 {
 }
 
-void ParNewRefProcTaskProxy::work(int i)
+void ParNewRefProcTaskProxy::work(uint worker_id)
 {
   ResourceMark rm;
   HandleMark hm;
-  ParScanThreadState& par_scan_state = _state_set.thread_state(i);
+  ParScanThreadState& par_scan_state = _state_set.thread_state(worker_id);
   par_scan_state.set_young_old_boundary(_young_old_boundary);
-  _task.work(i, par_scan_state.is_alive_closure(),
+  _task.work(worker_id, par_scan_state.is_alive_closure(),
              par_scan_state.keep_alive_closure(),
              par_scan_state.evacuate_followers_closure());
 }
@@ -802,9 +802,9 @@
       _task(task)
   { }
 
-  virtual void work(int i)
+  virtual void work(uint worker_id)
   {
-    _task.work(i);
+    _task.work(worker_id);
   }
 };
 
@@ -1042,7 +1042,11 @@
     size_policy->avg_survived()->sample(from()->used());
   }
 
-  update_time_of_last_gc(os::javaTimeMillis());
+  // We need to use a monotonically non-deccreasing time in ms
+  // or we will see time-warp warnings and os::javaTimeMillis()
+  // does not guarantee monotonicity.
+  jlong now = os::javaTimeNanos() / NANOSECS_PER_MILLISEC;
+  update_time_of_last_gc(now);
 
   SpecializationStats::print();
 
--- a/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -239,7 +239,7 @@
 
   HeapWord* young_old_boundary() { return _young_old_boundary; }
 
-  void work(int i);
+  void work(uint worker_id);
 
   // Reset the terminator in ParScanThreadStateSet for
   // "active_workers" threads.
--- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -418,25 +418,17 @@
       gc_count = Universe::heap()->total_collections();
 
       result = young_gen()->allocate(size);
-
-      // (1) If the requested object is too large to easily fit in the
-      //     young_gen, or
-      // (2) If GC is locked out via GCLocker, young gen is full and
-      //     the need for a GC already signalled to GCLocker (done
-      //     at a safepoint),
-      // ... then, rather than force a safepoint and (a potentially futile)
-      // collection (attempt) for each allocation, try allocation directly
-      // in old_gen. For case (2) above, we may in the future allow
-      // TLAB allocation directly in the old gen.
       if (result != NULL) {
         return result;
       }
-      if (size >= (young_gen()->eden_space()->capacity_in_words(Thread::current()) / 2)) {
-        result = old_gen()->allocate(size);
-        if (result != NULL) {
-          return result;
-        }
+
+      // If certain conditions hold, try allocating from the old gen.
+      result = mem_allocate_old_gen(size);
+      if (result != NULL) {
+        return result;
       }
+
+      // Failed to allocate without a gc.
       if (GC_locker::is_active_and_needs_gc()) {
         // If this thread is not in a jni critical section, we stall
         // the requestor until the critical section has cleared and
@@ -460,7 +452,6 @@
     }
 
     if (result == NULL) {
-
       // Generate a VM operation
       VM_ParallelGCFailedAllocation op(size, gc_count);
       VMThread::execute(&op);
@@ -523,6 +514,42 @@
   return result;
 }
 
+// A "death march" is a series of ultra-slow allocations in which a full gc is
+// done before each allocation, and after the full gc the allocation still
+// cannot be satisfied from the young gen.  This routine detects that condition;
+// it should be called after a full gc has been done and the allocation
+// attempted from the young gen. The parameter 'addr' should be the result of
+// that young gen allocation attempt.
+void
+ParallelScavengeHeap::death_march_check(HeapWord* const addr, size_t size) {
+  if (addr != NULL) {
+    _death_march_count = 0;  // death march has ended
+  } else if (_death_march_count == 0) {
+    if (should_alloc_in_eden(size)) {
+      _death_march_count = 1;    // death march has started
+    }
+  }
+}
+
+HeapWord* ParallelScavengeHeap::mem_allocate_old_gen(size_t size) {
+  if (!should_alloc_in_eden(size) || GC_locker::is_active_and_needs_gc()) {
+    // Size is too big for eden, or gc is locked out.
+    return old_gen()->allocate(size);
+  }
+
+  // If a "death march" is in progress, allocate from the old gen a limited
+  // number of times before doing a GC.
+  if (_death_march_count > 0) {
+    if (_death_march_count < 64) {
+      ++_death_march_count;
+      return old_gen()->allocate(size);
+    } else {
+      _death_march_count = 0;
+    }
+  }
+  return NULL;
+}
+
 // Failed allocation policy. Must be called from the VM thread, and
 // only at a safepoint! Note that this method has policy for allocation
 // flow, and NOT collection policy. So we do not check for gc collection
@@ -535,27 +562,22 @@
   assert(!Universe::heap()->is_gc_active(), "not reentrant");
   assert(!Heap_lock->owned_by_self(), "this thread should not own the Heap_lock");
 
-  size_t mark_sweep_invocation_count = total_invocations();
-
-  // We assume (and assert!) that an allocation at this point will fail
-  // unless we collect.
+  // We assume that allocation in eden will fail unless we collect.
 
   // First level allocation failure, scavenge and allocate in young gen.
   GCCauseSetter gccs(this, GCCause::_allocation_failure);
-  PSScavenge::invoke();
+  const bool invoked_full_gc = PSScavenge::invoke();
   HeapWord* result = young_gen()->allocate(size);
 
   // Second level allocation failure.
   //   Mark sweep and allocate in young generation.
-  if (result == NULL) {
-    // There is some chance the scavenge method decided to invoke mark_sweep.
-    // Don't mark sweep twice if so.
-    if (mark_sweep_invocation_count == total_invocations()) {
-      invoke_full_gc(false);
-      result = young_gen()->allocate(size);
-    }
+  if (result == NULL && !invoked_full_gc) {
+    invoke_full_gc(false);
+    result = young_gen()->allocate(size);
   }
 
+  death_march_check(result, size);
+
   // Third level allocation failure.
   //   After mark sweep and young generation allocation failure,
   //   allocate in old generation.
--- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -64,6 +64,7 @@
   // Collection of generations that are adjacent in the
   // space reserved for the heap.
   AdjoiningGenerations* _gens;
+  unsigned int _death_march_count;
 
   static GCTaskManager*          _gc_task_manager;      // The task manager.
 
@@ -71,8 +72,13 @@
   static inline size_t total_invocations();
   HeapWord* allocate_new_tlab(size_t size);
 
+  inline bool should_alloc_in_eden(size_t size) const;
+  inline void death_march_check(HeapWord* const result, size_t size);
+  HeapWord* mem_allocate_old_gen(size_t size);
+
  public:
   ParallelScavengeHeap() : CollectedHeap() {
+    _death_march_count = 0;
     set_alignment(_perm_gen_alignment, intra_heap_alignment());
     set_alignment(_young_gen_alignment, intra_heap_alignment());
     set_alignment(_old_gen_alignment, intra_heap_alignment());
--- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.inline.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.inline.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -36,6 +36,12 @@
     PSMarkSweep::total_invocations();
 }
 
+inline bool ParallelScavengeHeap::should_alloc_in_eden(const size_t size) const
+{
+  const size_t eden_size = young_gen()->eden_space()->capacity_in_words();
+  return size < eden_size / 2;
+}
+
 inline void ParallelScavengeHeap::invoke_scavenge()
 {
   PSScavenge::invoke();
--- a/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -100,12 +100,12 @@
 
 // This method contains no policy. You should probably
 // be calling invoke() instead.
-void PSMarkSweep::invoke_no_policy(bool clear_all_softrefs) {
+bool PSMarkSweep::invoke_no_policy(bool clear_all_softrefs) {
   assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint");
   assert(ref_processor() != NULL, "Sanity");
 
   if (GC_locker::check_active_before_gc()) {
-    return;
+    return false;
   }
 
   ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap();
@@ -132,9 +132,7 @@
 
   AdaptiveSizePolicyOutput(size_policy, heap->total_collections());
 
-  if (PrintHeapAtGC) {
-    Universe::print_heap_before_gc();
-  }
+  heap->print_heap_before_gc();
 
   // Fill in TLABs
   heap->accumulate_statistics_all_tlabs();
@@ -377,15 +375,15 @@
 
   NOT_PRODUCT(ref_processor()->verify_no_references_recorded());
 
-  if (PrintHeapAtGC) {
-    Universe::print_heap_after_gc();
-  }
+  heap->print_heap_after_gc();
 
   heap->post_full_gc_dump();
 
 #ifdef TRACESPINNING
   ParallelTaskTerminator::print_termination_counts();
 #endif
+
+  return true;
 }
 
 bool PSMarkSweep::absorb_live_data_from_eden(PSAdaptiveSizePolicy* size_policy,
@@ -504,7 +502,6 @@
 
 void PSMarkSweep::mark_sweep_phase1(bool clear_all_softrefs) {
   // Recursively traverse all live objects and mark them
-  EventMark m("1 mark object");
   TraceTime tm("phase 1", PrintGCDetails && Verbose, true, gclog_or_tty);
   trace(" 1");
 
@@ -563,7 +560,6 @@
 
 
 void PSMarkSweep::mark_sweep_phase2() {
-  EventMark m("2 compute new addresses");
   TraceTime tm("phase 2", PrintGCDetails && Verbose, true, gclog_or_tty);
   trace("2");
 
@@ -608,7 +604,6 @@
 
 void PSMarkSweep::mark_sweep_phase3() {
   // Adjust the pointers to reflect the new locations
-  EventMark m("3 adjust pointers");
   TraceTime tm("phase 3", PrintGCDetails && Verbose, true, gclog_or_tty);
   trace("3");
 
@@ -672,15 +667,20 @@
 }
 
 jlong PSMarkSweep::millis_since_last_gc() {
-  jlong ret_val = os::javaTimeMillis() - _time_of_last_gc;
+  // We need a monotonically non-deccreasing time in ms but
+  // os::javaTimeMillis() does not guarantee monotonicity.
+  jlong now = os::javaTimeNanos() / NANOSECS_PER_MILLISEC;
+  jlong ret_val = now - _time_of_last_gc;
   // XXX See note in genCollectedHeap::millis_since_last_gc().
   if (ret_val < 0) {
-    NOT_PRODUCT(warning("time warp: %d", ret_val);)
+    NOT_PRODUCT(warning("time warp: "INT64_FORMAT, ret_val);)
     return 0;
   }
   return ret_val;
 }
 
 void PSMarkSweep::reset_millis_since_last_gc() {
-  _time_of_last_gc = os::javaTimeMillis();
+  // We need a monotonically non-deccreasing time in ms but
+  // os::javaTimeMillis() does not guarantee monotonicity.
+  _time_of_last_gc = os::javaTimeNanos() / NANOSECS_PER_MILLISEC;
 }
--- a/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -78,7 +78,7 @@
 
  public:
   static void invoke(bool clear_all_softrefs);
-  static void invoke_no_policy(bool clear_all_softrefs);
+  static bool invoke_no_policy(bool clear_all_softrefs);
 
   static void initialize();
 
--- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -983,9 +983,7 @@
   // We need to track unique mark sweep invocations as well.
   _total_invocations++;
 
-  if (PrintHeapAtGC) {
-    Universe::print_heap_before_gc();
-  }
+  heap->print_heap_before_gc();
 
   // Fill in TLABs
   heap->accumulate_statistics_all_tlabs();
@@ -1838,7 +1836,6 @@
 void PSParallelCompact::summary_phase(ParCompactionManager* cm,
                                       bool maximum_compaction)
 {
-  EventMark m("2 summarize");
   TraceTime tm("summary phase", print_phases(), true, gclog_or_tty);
   // trace("2");
 
@@ -1996,12 +1993,12 @@
 
 // This method contains no policy. You should probably
 // be calling invoke() instead.
-void PSParallelCompact::invoke_no_policy(bool maximum_heap_compaction) {
+bool PSParallelCompact::invoke_no_policy(bool maximum_heap_compaction) {
   assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint");
   assert(ref_processor() != NULL, "Sanity");
 
   if (GC_locker::check_active_before_gc()) {
-    return;
+    return false;
   }
 
   TimeStamp marking_start;
@@ -2237,9 +2234,7 @@
 
   collection_exit.update();
 
-  if (PrintHeapAtGC) {
-    Universe::print_heap_after_gc();
-  }
+  heap->print_heap_after_gc();
   if (PrintGCTaskTimeStamps) {
     gclog_or_tty->print_cr("VM-Thread " INT64_FORMAT " " INT64_FORMAT " "
                            INT64_FORMAT,
@@ -2253,6 +2248,8 @@
 #ifdef TRACESPINNING
   ParallelTaskTerminator::print_termination_counts();
 #endif
+
+  return true;
 }
 
 bool PSParallelCompact::absorb_live_data_from_eden(PSAdaptiveSizePolicy* size_policy,
@@ -2352,7 +2349,6 @@
 void PSParallelCompact::marking_phase(ParCompactionManager* cm,
                                       bool maximum_heap_compaction) {
   // Recursively traverse all live objects and mark them
-  EventMark m("1 mark object");
   TraceTime tm("marking phase", print_phases(), true, gclog_or_tty);
 
   ParallelScavengeHeap* heap = gc_heap();
@@ -2438,7 +2434,6 @@
 
 void PSParallelCompact::adjust_roots() {
   // Adjust the pointers to reflect the new locations
-  EventMark m("3 adjust roots");
   TraceTime tm("adjust roots", print_phases(), true, gclog_or_tty);
 
   // General strong roots.
@@ -2469,7 +2464,6 @@
 }
 
 void PSParallelCompact::compact_perm(ParCompactionManager* cm) {
-  EventMark m("4 compact perm");
   TraceTime tm("compact perm gen", print_phases(), true, gclog_or_tty);
   // trace("4");
 
@@ -2647,7 +2641,6 @@
 }
 
 void PSParallelCompact::compact() {
-  EventMark m("5 compact");
   // trace("5");
   TraceTime tm("compaction phase", print_phases(), true, gclog_or_tty);
 
@@ -3398,17 +3391,22 @@
 }
 
 jlong PSParallelCompact::millis_since_last_gc() {
-  jlong ret_val = os::javaTimeMillis() - _time_of_last_gc;
+  // We need a monotonically non-deccreasing time in ms but
+  // os::javaTimeMillis() does not guarantee monotonicity.
+  jlong now = os::javaTimeNanos() / NANOSECS_PER_MILLISEC;
+  jlong ret_val = now - _time_of_last_gc;
   // XXX See note in genCollectedHeap::millis_since_last_gc().
   if (ret_val < 0) {
-    NOT_PRODUCT(warning("time warp: %d", ret_val);)
+    NOT_PRODUCT(warning("time warp: "INT64_FORMAT, ret_val);)
     return 0;
   }
   return ret_val;
 }
 
 void PSParallelCompact::reset_millis_since_last_gc() {
-  _time_of_last_gc = os::javaTimeMillis();
+  // We need a monotonically non-deccreasing time in ms but
+  // os::javaTimeMillis() does not guarantee monotonicity.
+  _time_of_last_gc = os::javaTimeNanos() / NANOSECS_PER_MILLISEC;
 }
 
 ParMarkBitMap::IterationStatus MoveAndUpdateClosure::copy_until_full()
@@ -3497,4 +3495,3 @@
   _updated_int_array_klass_obj = (klassOop)
     summary_data().calc_new_pointer(Universe::intArrayKlassObj());
 }
-
--- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1057,7 +1057,7 @@
   }
 
   static void invoke(bool maximum_heap_compaction);
-  static void invoke_no_policy(bool maximum_heap_compaction);
+  static bool invoke_no_policy(bool maximum_heap_compaction);
 
   static void post_initialize();
   // Perform initialization for PSParallelCompact that requires
--- a/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -247,167 +247,6 @@
   }
 }
 
-//
-// This method is pretty bulky. It would be nice to split it up
-// into smaller submethods, but we need to be careful not to hurt
-// performance.
-//
-
-oop PSPromotionManager::copy_to_survivor_space(oop o) {
-  assert(PSScavenge::should_scavenge(&o), "Sanity");
-
-  oop new_obj = NULL;
-
-  // NOTE! We must be very careful with any methods that access the mark
-  // in o. There may be multiple threads racing on it, and it may be forwarded
-  // at any time. Do not use oop methods for accessing the mark!
-  markOop test_mark = o->mark();
-
-  // The same test as "o->is_forwarded()"
-  if (!test_mark->is_marked()) {
-    bool new_obj_is_tenured = false;
-    size_t new_obj_size = o->size();
-
-    // Find the objects age, MT safe.
-    int age = (test_mark->has_displaced_mark_helper() /* o->has_displaced_mark() */) ?
-      test_mark->displaced_mark_helper()->age() : test_mark->age();
-
-    // Try allocating obj in to-space (unless too old)
-    if (age < PSScavenge::tenuring_threshold()) {
-      new_obj = (oop) _young_lab.allocate(new_obj_size);
-      if (new_obj == NULL && !_young_gen_is_full) {
-        // Do we allocate directly, or flush and refill?
-        if (new_obj_size > (YoungPLABSize / 2)) {
-          // Allocate this object directly
-          new_obj = (oop)young_space()->cas_allocate(new_obj_size);
-        } else {
-          // Flush and fill
-          _young_lab.flush();
-
-          HeapWord* lab_base = young_space()->cas_allocate(YoungPLABSize);
-          if (lab_base != NULL) {
-            _young_lab.initialize(MemRegion(lab_base, YoungPLABSize));
-            // Try the young lab allocation again.
-            new_obj = (oop) _young_lab.allocate(new_obj_size);
-          } else {
-            _young_gen_is_full = true;
-          }
-        }
-      }
-    }
-
-    // Otherwise try allocating obj tenured
-    if (new_obj == NULL) {
-#ifndef PRODUCT
-      if (Universe::heap()->promotion_should_fail()) {
-        return oop_promotion_failed(o, test_mark);
-      }
-#endif  // #ifndef PRODUCT
-
-      new_obj = (oop) _old_lab.allocate(new_obj_size);
-      new_obj_is_tenured = true;
-
-      if (new_obj == NULL) {
-        if (!_old_gen_is_full) {
-          // Do we allocate directly, or flush and refill?
-          if (new_obj_size > (OldPLABSize / 2)) {
-            // Allocate this object directly
-            new_obj = (oop)old_gen()->cas_allocate(new_obj_size);
-          } else {
-            // Flush and fill
-            _old_lab.flush();
-
-            HeapWord* lab_base = old_gen()->cas_allocate(OldPLABSize);
-            if(lab_base != NULL) {
-              _old_lab.initialize(MemRegion(lab_base, OldPLABSize));
-              // Try the old lab allocation again.
-              new_obj = (oop) _old_lab.allocate(new_obj_size);
-            }
-          }
-        }
-
-        // This is the promotion failed test, and code handling.
-        // The code belongs here for two reasons. It is slightly
-        // different thatn the code below, and cannot share the
-        // CAS testing code. Keeping the code here also minimizes
-        // the impact on the common case fast path code.
-
-        if (new_obj == NULL) {
-          _old_gen_is_full = true;
-          return oop_promotion_failed(o, test_mark);
-        }
-      }
-    }
-
-    assert(new_obj != NULL, "allocation should have succeeded");
-
-    // Copy obj
-    Copy::aligned_disjoint_words((HeapWord*)o, (HeapWord*)new_obj, new_obj_size);
-
-    // Now we have to CAS in the header.
-    if (o->cas_forward_to(new_obj, test_mark)) {
-      // We won any races, we "own" this object.
-      assert(new_obj == o->forwardee(), "Sanity");
-
-      // Increment age if obj still in new generation. Now that
-      // we're dealing with a markOop that cannot change, it is
-      // okay to use the non mt safe oop methods.
-      if (!new_obj_is_tenured) {
-        new_obj->incr_age();
-        assert(young_space()->contains(new_obj), "Attempt to push non-promoted obj");
-      }
-
-      // Do the size comparison first with new_obj_size, which we
-      // already have. Hopefully, only a few objects are larger than
-      // _min_array_size_for_chunking, and most of them will be arrays.
-      // So, the is->objArray() test would be very infrequent.
-      if (new_obj_size > _min_array_size_for_chunking &&
-          new_obj->is_objArray() &&
-          PSChunkLargeArrays) {
-        // we'll chunk it
-        oop* const masked_o = mask_chunked_array_oop(o);
-        push_depth(masked_o);
-        TASKQUEUE_STATS_ONLY(++_arrays_chunked; ++_masked_pushes);
-      } else {
-        // we'll just push its contents
-        new_obj->push_contents(this);
-      }
-    }  else {
-      // We lost, someone else "owns" this object
-      guarantee(o->is_forwarded(), "Object must be forwarded if the cas failed.");
-
-      // Try to deallocate the space.  If it was directly allocated we cannot
-      // deallocate it, so we have to test.  If the deallocation fails,
-      // overwrite with a filler object.
-      if (new_obj_is_tenured) {
-        if (!_old_lab.unallocate_object((HeapWord*) new_obj, new_obj_size)) {
-          CollectedHeap::fill_with_object((HeapWord*) new_obj, new_obj_size);
-        }
-      } else if (!_young_lab.unallocate_object((HeapWord*) new_obj, new_obj_size)) {
-        CollectedHeap::fill_with_object((HeapWord*) new_obj, new_obj_size);
-      }
-
-      // don't update this before the unallocation!
-      new_obj = o->forwardee();
-    }
-  } else {
-    assert(o->is_forwarded(), "Sanity");
-    new_obj = o->forwardee();
-  }
-
-#ifdef DEBUG
-  // This code must come after the CAS test, or it will print incorrect
-  // information.
-  if (TraceScavenge) {
-    gclog_or_tty->print_cr("{%s %s " PTR_FORMAT " -> " PTR_FORMAT " (" SIZE_FORMAT ")}",
-       PSScavenge::should_scavenge(&new_obj) ? "copying" : "tenuring",
-       new_obj->blueprint()->internal_name(), o, new_obj, new_obj->size());
-  }
-#endif
-
-  return new_obj;
-}
-
 template <class T> void PSPromotionManager::process_array_chunk_work(
                                                  oop obj,
                                                  int start, int end) {
--- a/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -171,7 +171,7 @@
   void set_old_gen_is_full(bool state) { _old_gen_is_full = state; }
 
   // Promotion methods
-  oop copy_to_survivor_space(oop o);
+  template<bool promote_immediately> oop copy_to_survivor_space(oop o);
   oop oop_promotion_failed(oop obj, markOop obj_mark);
 
   void reset();
--- a/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -61,6 +61,170 @@
   claim_or_forward_internal_depth(p);
 }
 
+//
+// This method is pretty bulky. It would be nice to split it up
+// into smaller submethods, but we need to be careful not to hurt
+// performance.
+//
+template<bool promote_immediately>
+oop PSPromotionManager::copy_to_survivor_space(oop o) {
+  assert(PSScavenge::should_scavenge(&o), "Sanity");
+
+  oop new_obj = NULL;
+
+  // NOTE! We must be very careful with any methods that access the mark
+  // in o. There may be multiple threads racing on it, and it may be forwarded
+  // at any time. Do not use oop methods for accessing the mark!
+  markOop test_mark = o->mark();
+
+  // The same test as "o->is_forwarded()"
+  if (!test_mark->is_marked()) {
+    bool new_obj_is_tenured = false;
+    size_t new_obj_size = o->size();
+
+    if (!promote_immediately) {
+      // Find the objects age, MT safe.
+      int age = (test_mark->has_displaced_mark_helper() /* o->has_displaced_mark() */) ?
+        test_mark->displaced_mark_helper()->age() : test_mark->age();
+
+      // Try allocating obj in to-space (unless too old)
+      if (age < PSScavenge::tenuring_threshold()) {
+        new_obj = (oop) _young_lab.allocate(new_obj_size);
+        if (new_obj == NULL && !_young_gen_is_full) {
+          // Do we allocate directly, or flush and refill?
+          if (new_obj_size > (YoungPLABSize / 2)) {
+            // Allocate this object directly
+            new_obj = (oop)young_space()->cas_allocate(new_obj_size);
+          } else {
+            // Flush and fill
+            _young_lab.flush();
+
+            HeapWord* lab_base = young_space()->cas_allocate(YoungPLABSize);
+            if (lab_base != NULL) {
+              _young_lab.initialize(MemRegion(lab_base, YoungPLABSize));
+              // Try the young lab allocation again.
+              new_obj = (oop) _young_lab.allocate(new_obj_size);
+            } else {
+              _young_gen_is_full = true;
+            }
+          }
+        }
+      }
+    }
+
+    // Otherwise try allocating obj tenured
+    if (new_obj == NULL) {
+#ifndef PRODUCT
+      if (Universe::heap()->promotion_should_fail()) {
+        return oop_promotion_failed(o, test_mark);
+      }
+#endif  // #ifndef PRODUCT
+
+      new_obj = (oop) _old_lab.allocate(new_obj_size);
+      new_obj_is_tenured = true;
+
+      if (new_obj == NULL) {
+        if (!_old_gen_is_full) {
+          // Do we allocate directly, or flush and refill?
+          if (new_obj_size > (OldPLABSize / 2)) {
+            // Allocate this object directly
+            new_obj = (oop)old_gen()->cas_allocate(new_obj_size);
+          } else {
+            // Flush and fill
+            _old_lab.flush();
+
+            HeapWord* lab_base = old_gen()->cas_allocate(OldPLABSize);
+            if(lab_base != NULL) {
+              _old_lab.initialize(MemRegion(lab_base, OldPLABSize));
+              // Try the old lab allocation again.
+              new_obj = (oop) _old_lab.allocate(new_obj_size);
+            }
+          }
+        }
+
+        // This is the promotion failed test, and code handling.
+        // The code belongs here for two reasons. It is slightly
+        // different thatn the code below, and cannot share the
+        // CAS testing code. Keeping the code here also minimizes
+        // the impact on the common case fast path code.
+
+        if (new_obj == NULL) {
+          _old_gen_is_full = true;
+          return oop_promotion_failed(o, test_mark);
+        }
+      }
+    }
+
+    assert(new_obj != NULL, "allocation should have succeeded");
+
+    // Copy obj
+    Copy::aligned_disjoint_words((HeapWord*)o, (HeapWord*)new_obj, new_obj_size);
+
+    // Now we have to CAS in the header.
+    if (o->cas_forward_to(new_obj, test_mark)) {
+      // We won any races, we "own" this object.
+      assert(new_obj == o->forwardee(), "Sanity");
+
+      // Increment age if obj still in new generation. Now that
+      // we're dealing with a markOop that cannot change, it is
+      // okay to use the non mt safe oop methods.
+      if (!new_obj_is_tenured) {
+        new_obj->incr_age();
+        assert(young_space()->contains(new_obj), "Attempt to push non-promoted obj");
+      }
+
+      // Do the size comparison first with new_obj_size, which we
+      // already have. Hopefully, only a few objects are larger than
+      // _min_array_size_for_chunking, and most of them will be arrays.
+      // So, the is->objArray() test would be very infrequent.
+      if (new_obj_size > _min_array_size_for_chunking &&
+          new_obj->is_objArray() &&
+          PSChunkLargeArrays) {
+        // we'll chunk it
+        oop* const masked_o = mask_chunked_array_oop(o);
+        push_depth(masked_o);
+        TASKQUEUE_STATS_ONLY(++_arrays_chunked; ++_masked_pushes);
+      } else {
+        // we'll just push its contents
+        new_obj->push_contents(this);
+      }
+    }  else {
+      // We lost, someone else "owns" this object
+      guarantee(o->is_forwarded(), "Object must be forwarded if the cas failed.");
+
+      // Try to deallocate the space.  If it was directly allocated we cannot
+      // deallocate it, so we have to test.  If the deallocation fails,
+      // overwrite with a filler object.
+      if (new_obj_is_tenured) {
+        if (!_old_lab.unallocate_object((HeapWord*) new_obj, new_obj_size)) {
+          CollectedHeap::fill_with_object((HeapWord*) new_obj, new_obj_size);
+        }
+      } else if (!_young_lab.unallocate_object((HeapWord*) new_obj, new_obj_size)) {
+        CollectedHeap::fill_with_object((HeapWord*) new_obj, new_obj_size);
+      }
+
+      // don't update this before the unallocation!
+      new_obj = o->forwardee();
+    }
+  } else {
+    assert(o->is_forwarded(), "Sanity");
+    new_obj = o->forwardee();
+  }
+
+#ifdef DEBUG
+  // This code must come after the CAS test, or it will print incorrect
+  // information.
+  if (TraceScavenge) {
+    gclog_or_tty->print_cr("{%s %s " PTR_FORMAT " -> " PTR_FORMAT " (" SIZE_FORMAT ")}",
+       PSScavenge::should_scavenge(&new_obj) ? "copying" : "tenuring",
+       new_obj->blueprint()->internal_name(), o, new_obj, new_obj->size());
+  }
+#endif
+
+  return new_obj;
+}
+
+
 inline void PSPromotionManager::process_popped_location_depth(StarTask p) {
   if (is_oop_masked(p)) {
     assert(PSChunkLargeArrays, "invariant");
@@ -69,9 +233,9 @@
   } else {
     if (p.is_narrow()) {
       assert(UseCompressedOops, "Error");
-      PSScavenge::copy_and_push_safe_barrier(this, (narrowOop*)p);
+      PSScavenge::copy_and_push_safe_barrier<narrowOop, /*promote_immediately=*/false>(this, p);
     } else {
-      PSScavenge::copy_and_push_safe_barrier(this, (oop*)p);
+      PSScavenge::copy_and_push_safe_barrier<oop, /*promote_immediately=*/false>(this, p);
     }
   }
 }
--- a/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -24,6 +24,7 @@
 
 #include "precompiled.hpp"
 #include "classfile/symbolTable.hpp"
+#include "code/codeCache.hpp"
 #include "gc_implementation/parallelScavenge/cardTableExtension.hpp"
 #include "gc_implementation/parallelScavenge/gcTaskManager.hpp"
 #include "gc_implementation/parallelScavenge/generationSizer.hpp"
@@ -100,7 +101,7 @@
 
     // Weak refs may be visited more than once.
     if (PSScavenge::should_scavenge(p, _to_space)) {
-      PSScavenge::copy_and_push_safe_barrier(_promotion_manager, p);
+      PSScavenge::copy_and_push_safe_barrier<T, /*promote_immediately=*/false>(_promotion_manager, p);
     }
   }
   virtual void do_oop(oop* p)       { PSKeepAliveClosure::do_oop_work(p); }
@@ -214,36 +215,41 @@
 //
 // Note that this method should only be called from the vm_thread while
 // at a safepoint!
-void PSScavenge::invoke() {
+bool PSScavenge::invoke() {
   assert(SafepointSynchronize::is_at_safepoint(), "should be at safepoint");
   assert(Thread::current() == (Thread*)VMThread::vm_thread(), "should be in vm thread");
   assert(!Universe::heap()->is_gc_active(), "not reentrant");
 
-  ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap();
+  ParallelScavengeHeap* const heap = (ParallelScavengeHeap*)Universe::heap();
   assert(heap->kind() == CollectedHeap::ParallelScavengeHeap, "Sanity");
 
   PSAdaptiveSizePolicy* policy = heap->size_policy();
   IsGCActiveMark mark;
 
-  bool scavenge_was_done = PSScavenge::invoke_no_policy();
+  const bool scavenge_done = PSScavenge::invoke_no_policy();
+  const bool need_full_gc = !scavenge_done ||
+    policy->should_full_GC(heap->old_gen()->free_in_bytes());
+  bool full_gc_done = false;
 
-  PSGCAdaptivePolicyCounters* counters = heap->gc_policy_counters();
-  if (UsePerfData)
-    counters->update_full_follows_scavenge(0);
-  if (!scavenge_was_done ||
-      policy->should_full_GC(heap->old_gen()->free_in_bytes())) {
-    if (UsePerfData)
-      counters->update_full_follows_scavenge(full_follows_scavenge);
+  if (UsePerfData) {
+    PSGCAdaptivePolicyCounters* const counters = heap->gc_policy_counters();
+    const int ffs_val = need_full_gc ? full_follows_scavenge : not_skipped;
+    counters->update_full_follows_scavenge(ffs_val);
+  }
+
+  if (need_full_gc) {
     GCCauseSetter gccs(heap, GCCause::_adaptive_size_policy);
     CollectorPolicy* cp = heap->collector_policy();
     const bool clear_all_softrefs = cp->should_clear_all_soft_refs();
 
     if (UseParallelOldGC) {
-      PSParallelCompact::invoke_no_policy(clear_all_softrefs);
+      full_gc_done = PSParallelCompact::invoke_no_policy(clear_all_softrefs);
     } else {
-      PSMarkSweep::invoke_no_policy(clear_all_softrefs);
+      full_gc_done = PSMarkSweep::invoke_no_policy(clear_all_softrefs);
     }
   }
+
+  return full_gc_done;
 }
 
 // This method contains no policy. You should probably
@@ -295,9 +301,7 @@
     heap->record_gen_tops_before_GC();
   }
 
-  if (PrintHeapAtGC) {
-    Universe::print_heap_before_gc();
-  }
+  heap->print_heap_before_gc();
 
   assert(!NeverTenure || _tenuring_threshold == markOopDesc::max_age + 1, "Sanity");
   assert(!AlwaysTenure || _tenuring_threshold == 0, "Sanity");
@@ -604,6 +608,8 @@
 
     NOT_PRODUCT(reference_processor()->verify_no_references_recorded());
 
+    CodeCache::prune_scavenge_root_nmethods();
+
     // Re-verify object start arrays
     if (VerifyObjectStartArray &&
         VerifyAfterGC) {
@@ -643,9 +649,7 @@
     Universe::verify(false);
   }
 
-  if (PrintHeapAtGC) {
-    Universe::print_heap_after_gc();
-  }
+  heap->print_heap_after_gc();
 
   if (ZapUnusedHeapArea) {
     young_gen->eden_space()->check_mangled_unused_area_complete();
--- a/src/share/vm/gc_implementation/parallelScavenge/psScavenge.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/psScavenge.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -117,10 +117,9 @@
   // Called by parallelScavengeHeap to init the tenuring threshold
   static void initialize();
 
-  // Scavenge entry point
-  static void invoke();
-  // Return true is a collection was done.  Return
-  // false if the collection was skipped.
+  // Scavenge entry point.  This may invoke a full gc; return true if so.
+  static bool invoke();
+  // Return true if a collection was done; false otherwise.
   static bool invoke_no_policy();
 
   // If an attempt to promote fails, this method is invoked
@@ -135,7 +134,8 @@
   template <class T> static inline bool should_scavenge(T* p, MutableSpace* to_space);
   template <class T> static inline bool should_scavenge(T* p, bool check_to_space);
 
-  template <class T> inline static void copy_and_push_safe_barrier(PSPromotionManager* pm, T* p);
+  template <class T, bool promote_immediately>
+    inline static void copy_and_push_safe_barrier(PSPromotionManager* pm, T* p);
 
   // Is an object in the young generation
   // This assumes that the HeapWord argument is in the heap,
--- a/src/share/vm/gc_implementation/parallelScavenge/psScavenge.inline.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/psScavenge.inline.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -28,6 +28,7 @@
 #include "gc_implementation/parallelScavenge/cardTableExtension.hpp"
 #include "gc_implementation/parallelScavenge/parallelScavengeHeap.hpp"
 #include "gc_implementation/parallelScavenge/psPromotionManager.hpp"
+#include "gc_implementation/parallelScavenge/psPromotionManager.inline.hpp"
 #include "gc_implementation/parallelScavenge/psScavenge.hpp"
 
 inline void PSScavenge::save_to_space_top_before_gc() {
@@ -65,7 +66,7 @@
 // Attempt to "claim" oop at p via CAS, push the new obj if successful
 // This version tests the oop* to make sure it is within the heap before
 // attempting marking.
-template <class T>
+template <class T, bool promote_immediately>
 inline void PSScavenge::copy_and_push_safe_barrier(PSPromotionManager* pm,
                                                    T*                  p) {
   assert(should_scavenge(p, true), "revisiting object?");
@@ -73,7 +74,7 @@
   oop o = oopDesc::load_decode_heap_oop_not_null(p);
   oop new_obj = o->is_forwarded()
         ? o->forwardee()
-        : pm->copy_to_survivor_space(o);
+        : pm->copy_to_survivor_space<promote_immediately>(o);
   oopDesc::encode_store_heap_oop_not_null(p, new_obj);
 
   // We cannot mark without test, as some code passes us pointers
@@ -86,7 +87,8 @@
   }
 }
 
-class PSScavengeRootsClosure: public OopClosure {
+template<bool promote_immediately>
+class PSRootsClosure: public OopClosure {
  private:
   PSPromotionManager* _promotion_manager;
 
@@ -94,13 +96,16 @@
   template <class T> void do_oop_work(T *p) {
     if (PSScavenge::should_scavenge(p)) {
       // We never card mark roots, maybe call a func without test?
-      PSScavenge::copy_and_push_safe_barrier(_promotion_manager, p);
+      PSScavenge::copy_and_push_safe_barrier<T, promote_immediately>(_promotion_manager, p);
     }
   }
  public:
-  PSScavengeRootsClosure(PSPromotionManager* pm) : _promotion_manager(pm) { }
-  void do_oop(oop* p)       { PSScavengeRootsClosure::do_oop_work(p); }
-  void do_oop(narrowOop* p) { PSScavengeRootsClosure::do_oop_work(p); }
+  PSRootsClosure(PSPromotionManager* pm) : _promotion_manager(pm) { }
+  void do_oop(oop* p)       { PSRootsClosure::do_oop_work(p); }
+  void do_oop(narrowOop* p) { PSRootsClosure::do_oop_work(p); }
 };
 
+typedef PSRootsClosure</*promote_immediately=*/false> PSScavengeRootsClosure;
+typedef PSRootsClosure</*promote_immediately=*/true> PSPromoteRootsClosure;
+
 #endif // SHARE_VM_GC_IMPLEMENTATION_PARALLELSCAVENGE_PSSCAVENGE_INLINE_HPP
--- a/src/share/vm/gc_implementation/parallelScavenge/psTasks.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/psTasks.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -51,6 +51,7 @@
 
   PSPromotionManager* pm = PSPromotionManager::gc_thread_promotion_manager(which);
   PSScavengeRootsClosure roots_closure(pm);
+  PSPromoteRootsClosure  roots_to_old_closure(pm);
 
   switch (_root_type) {
     case universe:
@@ -91,7 +92,7 @@
 
     case code_cache:
       {
-        CodeBlobToOopClosure each_scavengable_code_blob(&roots_closure, /*do_marking=*/ true);
+        CodeBlobToOopClosure each_scavengable_code_blob(&roots_to_old_closure, /*do_marking=*/ true);
         CodeCache::scavenge_root_nmethods_do(&each_scavengable_code_blob);
       }
       break;
--- a/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -282,7 +282,7 @@
     // large page can be broken down if we require small pages.
     os::realign_memory((char*)aligned_region.start(), aligned_region.byte_size(), page_size());
     // Then we uncommit the pages in the range.
-    os::free_memory((char*)aligned_region.start(), aligned_region.byte_size());
+    os::free_memory((char*)aligned_region.start(), aligned_region.byte_size(), page_size());
     // And make them local/first-touch biased.
     os::numa_make_local((char*)aligned_region.start(), aligned_region.byte_size(), lgrp_id);
   }
@@ -297,7 +297,7 @@
     assert((intptr_t)aligned_region.start()     % page_size() == 0 &&
            (intptr_t)aligned_region.byte_size() % page_size() == 0, "Bad alignment");
     assert(region().contains(aligned_region), "Sanity");
-    os::free_memory((char*)aligned_region.start(), aligned_region.byte_size());
+    os::free_memory((char*)aligned_region.start(), aligned_region.byte_size(), page_size());
   }
 }
 
@@ -954,7 +954,7 @@
     if (e != scan_end) {
       if ((page_expected.size != page_size || page_expected.lgrp_id != lgrp_id())
           && page_expected.size != 0) {
-        os::free_memory(s, pointer_delta(e, s, sizeof(char)));
+        os::free_memory(s, pointer_delta(e, s, sizeof(char)), page_size);
       }
       page_expected = page_found;
     }
--- a/src/share/vm/gc_implementation/shared/mutableSpace.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_implementation/shared/mutableSpace.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -51,7 +51,7 @@
       size_t size = pointer_delta(end, start, sizeof(char));
       if (clear_space) {
         // Prefer page reallocation to migration.
-        os::free_memory((char*)start, size);
+        os::free_memory((char*)start, size, page_size);
       }
       os::numa_make_global((char*)start, size);
     }
--- a/src/share/vm/gc_interface/collectedHeap.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_interface/collectedHeap.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -51,6 +51,31 @@
 
 size_t CollectedHeap::_filler_array_max_size = 0;
 
+template <>
+void EventLogBase<GCMessage>::print(outputStream* st, GCMessage& m) {
+  st->print_cr("GC heap %s", m.is_before ? "before" : "after");
+  st->print_raw(m);
+}
+
+void GCHeapLog::log_heap(bool before) {
+  if (!should_log()) {
+    return;
+  }
+
+  double timestamp = fetch_timestamp();
+  MutexLockerEx ml(&_mutex, Mutex::_no_safepoint_check_flag);
+  int index = compute_log_index();
+  _records[index].thread = NULL; // Its the GC thread so it's not that interesting.
+  _records[index].timestamp = timestamp;
+  _records[index].data.is_before = before;
+  stringStream st(_records[index].data.buffer(), _records[index].data.size());
+  if (before) {
+    Universe::print_heap_before_gc(&st, true);
+  } else {
+    Universe::print_heap_after_gc(&st, true);
+  }
+}
+
 // Memory state functions.
 
 
@@ -81,6 +106,12 @@
                              80, GCCause::to_string(_gc_lastcause), CHECK);
   }
   _defer_initial_card_mark = false; // strengthened by subclass in pre_initialize() below.
+  // Create the ring log
+  if (LogEvents) {
+    _gc_heap_log = new GCHeapLog();
+  } else {
+    _gc_heap_log = NULL;
+  }
 }
 
 void CollectedHeap::pre_initialize() {
@@ -471,3 +502,30 @@
 
   return mirror;
 }
+
+/////////////// Unit tests ///////////////
+
+#ifndef PRODUCT
+void CollectedHeap::test_is_in() {
+  CollectedHeap* heap = Universe::heap();
+
+  uintptr_t epsilon    = (uintptr_t) MinObjAlignment;
+  uintptr_t heap_start = (uintptr_t) heap->_reserved.start();
+  uintptr_t heap_end   = (uintptr_t) heap->_reserved.end();
+
+  // Test that NULL is not in the heap.
+  assert(!heap->is_in(NULL), "NULL is unexpectedly in the heap");
+
+  // Test that a pointer to before the heap start is reported as outside the heap.
+  assert(heap_start >= ((uintptr_t)NULL + epsilon), "sanity");
+  void* before_heap = (void*)(heap_start - epsilon);
+  assert(!heap->is_in(before_heap),
+      err_msg("before_heap: " PTR_FORMAT " is unexpectedly in the heap", before_heap));
+
+  // Test that a pointer to after the heap end is reported as outside the heap.
+  assert(heap_end <= ((uintptr_t)-1 - epsilon), "sanity");
+  void* after_heap = (void*)(heap_end + epsilon);
+  assert(!heap->is_in(after_heap),
+      err_msg("after_heap: " PTR_FORMAT " is unexpectedly in the heap", after_heap));
+}
+#endif
--- a/src/share/vm/gc_interface/collectedHeap.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_interface/collectedHeap.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -31,6 +31,7 @@
 #include "runtime/handles.hpp"
 #include "runtime/perfData.hpp"
 #include "runtime/safepoint.hpp"
+#include "utilities/events.hpp"
 
 // A "CollectedHeap" is an implementation of a java heap for HotSpot.  This
 // is an abstract class: there may be many different kinds of heaps.  This
@@ -43,6 +44,29 @@
 class Thread;
 class CollectorPolicy;
 
+class GCMessage : public FormatBuffer<1024> {
+ public:
+  bool is_before;
+
+ public:
+  GCMessage() {}
+};
+
+class GCHeapLog : public EventLogBase<GCMessage> {
+ private:
+  void log_heap(bool before);
+
+ public:
+  GCHeapLog() : EventLogBase<GCMessage>("GC Heap History") {}
+
+  void log_heap_before() {
+    log_heap(true);
+  }
+  void log_heap_after() {
+    log_heap(false);
+  }
+};
+
 //
 // CollectedHeap
 //   SharedHeap
@@ -62,6 +86,8 @@
   // Used for filler objects (static, but initialized in ctor).
   static size_t _filler_array_max_size;
 
+  GCHeapLog* _gc_heap_log;
+
   // Used in support of ReduceInitialCardMarks; only consulted if COMPILER2 is being used
   bool _defer_initial_card_mark;
 
@@ -69,7 +95,7 @@
   MemRegion _reserved;
   BarrierSet* _barrier_set;
   bool _is_gc_active;
-  int _n_par_threads;
+  uint _n_par_threads;
 
   unsigned int _total_collections;          // ... started
   unsigned int _total_full_collections;     // ... started
@@ -217,8 +243,8 @@
     return p == NULL || is_in_reserved(p);
   }
 
-  // Returns "TRUE" if "p" points to the head of an allocated object in the
-  // heap. Since this method can be expensive in general, we restrict its
+  // Returns "TRUE" iff "p" points into the committed areas of the heap.
+  // Since this method can be expensive in general, we restrict its
   // use to assertion checking only.
   virtual bool is_in(const void* p) const = 0;
 
@@ -309,10 +335,10 @@
   GCCause::Cause gc_cause() { return _gc_cause; }
 
   // Number of threads currently working on GC tasks.
-  int n_par_threads() { return _n_par_threads; }
+  uint n_par_threads() { return _n_par_threads; }
 
   // May be overridden to set additional parallelism.
-  virtual void set_par_threads(int t) { _n_par_threads = t; };
+  virtual void set_par_threads(uint t) { _n_par_threads = t; };
 
   // Preload classes into the shared portion of the heap, and then dump
   // that data to a file so that it can be loaded directly by another
@@ -618,6 +644,27 @@
   // Default implementation does nothing.
   virtual void print_tracing_info() const = 0;
 
+  // If PrintHeapAtGC is set call the appropriate routi
+  void print_heap_before_gc() {
+    if (PrintHeapAtGC) {
+      Universe::print_heap_before_gc();
+    }
+    if (_gc_heap_log != NULL) {
+      _gc_heap_log->log_heap_before();
+    }
+  }
+  void print_heap_after_gc() {
+    if (PrintHeapAtGC) {
+      Universe::print_heap_after_gc();
+    }
+    if (_gc_heap_log != NULL) {
+      _gc_heap_log->log_heap_after();
+    }
+  }
+
+  // Allocate GCHeapLog during VM startup
+  static void initialize_heap_log();
+
   // Heap verification
   virtual void verify(bool allow_dirty, bool silent, VerifyOption option) = 0;
 
@@ -648,6 +695,10 @@
   // reduce the occurrence of ParallelGCThreads to uses where the
   // actual number may be germane.
   static bool use_parallel_gc_threads() { return ParallelGCThreads > 0; }
+
+  /////////////// Unit tests ///////////////
+
+  NOT_PRODUCT(static void test_is_in();)
 };
 
 // Class to set and reset the GC cause for a CollectedHeap.
--- a/src/share/vm/gc_interface/gcCause.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_interface/gcCause.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -84,6 +84,9 @@
     case _g1_inc_collection_pause:
       return "G1 Evacuation Pause";
 
+    case _g1_humongous_allocation:
+      return "G1 Humongous Allocation";
+
     case _last_ditch_collection:
       return "Last ditch collection";
 
--- a/src/share/vm/gc_interface/gcCause.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/gc_interface/gcCause.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -66,6 +66,7 @@
     _adaptive_size_policy,
 
     _g1_inc_collection_pause,
+    _g1_humongous_allocation,
 
     _last_ditch_collection,
     _last_gc_cause
--- a/src/share/vm/graal/graalCompilerToVM.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/graal/graalCompilerToVM.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -763,16 +763,16 @@
   set_int(env, config, "stackShadowPages", StackShadowPages);
   set_int(env, config, "hubOffset", oopDesc::klass_offset_in_bytes());
   set_int(env, config, "arrayLengthOffset", arrayOopDesc::length_offset_in_bytes());
-  set_int(env, config, "klassStateOffset", instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc));
+  set_int(env, config, "klassStateOffset", in_bytes(instanceKlass::init_state_offset()));
   set_int(env, config, "klassStateFullyInitialized", (int)instanceKlass::fully_initialized);
   set_int(env, config, "threadTlabTopOffset", in_bytes(JavaThread::tlab_top_offset()));
   set_int(env, config, "threadTlabEndOffset", in_bytes(JavaThread::tlab_end_offset()));
   set_int(env, config, "threadObjectOffset", in_bytes(JavaThread::threadObj_offset()));
-  set_int(env, config, "instanceHeaderPrototypeOffset", Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes());
+  set_int(env, config, "instanceHeaderPrototypeOffset", in_bytes(Klass::prototype_header_offset()));
   set_int(env, config, "threadExceptionOopOffset", in_bytes(JavaThread::exception_oop_offset()));
   set_int(env, config, "threadExceptionPcOffset", in_bytes(JavaThread::exception_pc_offset()));
   set_int(env, config, "threadMultiNewArrayStorage", in_bytes(JavaThread::graal_multinewarray_storage_offset()));
-  set_int(env, config, "classMirrorOffset", klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes());
+  set_int(env, config, "classMirrorOffset", in_bytes(Klass::java_mirror_offset()));
   
   set_int(env, config, "methodDataOopDataOffset", in_bytes(methodDataOopDesc::data_offset()));
   set_int(env, config, "dataLayoutHeaderSize", DataLayout::header_size_in_bytes());
@@ -805,8 +805,8 @@
   set_long(env, config, "fastMonitorExitStub", VmIds::addStub(Runtime1::entry_for(Runtime1::graal_monitorexit_id)));
   set_long(env, config, "safepointPollingAddress", (jlong)(os::get_polling_page() + (SafepointPollOffset % os::vm_page_size())));
   set_int(env, config, "runtimeCallStackSize", (jint)frame::arg_reg_save_area_bytes);
-  set_int(env, config, "klassModifierFlagsOffset", Klass::modifier_flags_offset_in_bytes() + sizeof(oopDesc));
-  set_int(env, config, "graalMirrorKlassOffset", klassOopDesc::klass_part_offset_in_bytes() + Klass::graal_mirror_offset_in_bytes());
+  set_int(env, config, "klassModifierFlagsOffset", in_bytes(Klass::modifier_flags_offset()));
+  set_int(env, config, "graalMirrorKlassOffset", in_bytes(Klass::graal_mirror_offset()));
   set_int(env, config, "klassOopOffset", java_lang_Class::klass_offset_in_bytes());
 
   set_boolean(env, config, "isPollingPageFar", Assembler::is_polling_page_far());
@@ -844,7 +844,7 @@
     env->SetIntArrayRegion(arrayOffsets, i, 1, &offset);
   }
   set_int_array(env, config, "arrayOffsets", arrayOffsets);
-  set_int(env, config, "arrayClassElementOffset", objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc));
+  set_int(env, config, "arrayClassElementOffset", in_bytes(objArrayKlass::element_klass_offset()));
   return config;
 }
 
--- a/src/share/vm/interpreter/interpreterRuntime.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/interpreter/interpreterRuntime.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -445,6 +445,7 @@
     }
   } while (should_repeat == true);
 
+#ifdef GRAAL
   if (h_method->method_data() != NULL) {
     ResourceMark rm(thread);
     ProfileData* pdata = h_method->method_data()->allocate_bci_to_data(current_bci);
@@ -456,6 +457,7 @@
       }
     }
   }
+#endif
 
   // notify JVMTI of an exception throw; JVMTI will detect if this is a first
   // time throw or a stack unwinding throw and accordingly notify the debugger
@@ -871,7 +873,9 @@
   const int branch_bci = branch_bcp != NULL ? method->bci_from(branch_bcp) : InvocationEntryBci;
   const int bci = branch_bcp != NULL ? method->bci_from(fr.interpreter_frame_bcp()) : InvocationEntryBci;
 
+  assert(!HAS_PENDING_EXCEPTION, "Should not have any exceptions pending");
   nmethod* osr_nm = CompilationPolicy::policy()->event(method, method, branch_bci, bci, CompLevel_none, NULL, thread);
+  assert(!HAS_PENDING_EXCEPTION, "Event handler should not throw any exceptions");
 
   if (osr_nm != NULL) {
     // We may need to do on-stack replacement which requires that no
--- a/src/share/vm/interpreter/templateTable.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/interpreter/templateTable.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -429,8 +429,11 @@
   def(Bytecodes::_jsr                 , ubcp|disp|____|____, vtos, vtos, jsr                 ,  _           ); // result is not an oop, so do not transition to atos
   def(Bytecodes::_ret                 , ubcp|disp|____|____, vtos, vtos, ret                 ,  _           );
   def(Bytecodes::_tableswitch         , ubcp|disp|____|____, itos, vtos, tableswitch         ,  _           );
-//  def(Bytecodes::_lookupswitch        , ubcp|disp|____|____, itos, itos, lookupswitch        ,  _           );
+#ifdef GRAAL
   def(Bytecodes::_lookupswitch        , ubcp|disp|____|____, itos, vtos, fast_linearswitch   ,  _           );
+#else
+  def(Bytecodes::_lookupswitch        , ubcp|disp|____|____, itos, itos, lookupswitch        ,  _           );
+#endif
   def(Bytecodes::_ireturn             , ____|disp|clvm|____, itos, itos, _return             , itos         );
   def(Bytecodes::_lreturn             , ____|disp|clvm|____, ltos, ltos, _return             , ltos         );
   def(Bytecodes::_freturn             , ____|disp|clvm|____, ftos, ftos, _return             , ftos         );
--- a/src/share/vm/memory/compactingPermGenGen.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/memory/compactingPermGenGen.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -240,9 +240,6 @@
     if (_ro_space == NULL || _rw_space == NULL)
       vm_exit_during_initialization("Could not allocate a shared space");
 
-    // Cover both shared spaces entirely with cards.
-    _rs->resize_covered_region(MemRegion(readonly_bottom, readwrite_end));
-
     if (UseSharedSpaces) {
 
       // Map in the regions in the shared file.
@@ -279,10 +276,14 @@
         delete _rw_space;
         _rw_space = NULL;
         shared_end = (HeapWord*)(rs.base() + rs.size());
-        _rs->resize_covered_region(MemRegion(shared_bottom, shared_bottom));
       }
     }
 
+    if (spec()->enable_shared_spaces()) {
+      // Cover both shared spaces entirely with cards.
+      _rs->resize_covered_region(MemRegion(readonly_bottom, readwrite_end));
+    }
+
     // Reserved region includes shared spaces for oop.is_in_reserved().
     _reserved.set_end(shared_end);
 
--- a/src/share/vm/memory/defNewGeneration.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/memory/defNewGeneration.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -655,7 +655,12 @@
   from()->set_concurrent_iteration_safe_limit(from()->top());
   to()->set_concurrent_iteration_safe_limit(to()->top());
   SpecializationStats::print();
-  update_time_of_last_gc(os::javaTimeMillis());
+
+  // We need to use a monotonically non-deccreasing time in ms
+  // or we will see time-warp warnings and os::javaTimeMillis()
+  // does not guarantee monotonicity.
+  jlong now = os::javaTimeNanos() / NANOSECS_PER_MILLISEC;
+  update_time_of_last_gc(now);
 }
 
 class RemoveForwardPointerClosure: public ObjectClosure {
--- a/src/share/vm/memory/dump.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/memory/dump.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1402,7 +1402,7 @@
         instanceKlass* ik = (instanceKlass*) k;
         // Link the class to cause the bytecodes to be rewritten and the
         // cpcache to be created.
-        if (ik->get_init_state() < instanceKlass::linked) {
+        if (ik->init_state() < instanceKlass::linked) {
           ik->link_class(THREAD);
           guarantee(!HAS_PENDING_EXCEPTION, "exception in class rewriting");
         }
@@ -1535,7 +1535,7 @@
         // are loaded in order that the related data structures (klass,
         // cpCache, Sting constants) are located together.
 
-        if (ik->get_init_state() < instanceKlass::linked) {
+        if (ik->init_state() < instanceKlass::linked) {
           ik->link_class(THREAD);
           guarantee(!(HAS_PENDING_EXCEPTION), "exception in class rewriting");
         }
--- a/src/share/vm/memory/gcLocker.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/memory/gcLocker.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -32,37 +32,90 @@
 volatile bool GC_locker::_needs_gc       = false;
 volatile bool GC_locker::_doing_gc       = false;
 
+#ifdef ASSERT
+volatile jint GC_locker::_debug_jni_lock_count = 0;
+#endif
+
+
+#ifdef ASSERT
+void GC_locker::verify_critical_count() {
+  if (SafepointSynchronize::is_at_safepoint()) {
+    assert(!needs_gc() || _debug_jni_lock_count == _jni_lock_count, "must agree");
+    int count = 0;
+    // Count the number of threads with critical operations in progress
+    for (JavaThread* thr = Threads::first(); thr; thr = thr->next()) {
+      if (thr->in_critical()) {
+        count++;
+      }
+    }
+    if (_jni_lock_count != count) {
+      tty->print_cr("critical counts don't match: %d != %d", _jni_lock_count, count);
+      for (JavaThread* thr = Threads::first(); thr; thr = thr->next()) {
+        if (thr->in_critical()) {
+          tty->print_cr(INTPTR_FORMAT " in_critical %d", thr, thr->in_critical());
+        }
+      }
+    }
+    assert(_jni_lock_count == count, "must be equal");
+  }
+}
+#endif
+
+bool GC_locker::check_active_before_gc() {
+  assert(SafepointSynchronize::is_at_safepoint(), "only read at safepoint");
+  if (is_active() && !_needs_gc) {
+    verify_critical_count();
+    _needs_gc = true;
+    if (PrintJNIGCStalls && PrintGCDetails) {
+      ResourceMark rm; // JavaThread::name() allocates to convert to UTF8
+      gclog_or_tty->print_cr("%.3f: Setting _needs_gc. Thread \"%s\" %d locked.",
+                             gclog_or_tty->time_stamp().seconds(), Thread::current()->name(), _jni_lock_count);
+    }
+
+  }
+  return is_active();
+}
+
 void GC_locker::stall_until_clear() {
   assert(!JavaThread::current()->in_critical(), "Would deadlock");
-  if (PrintJNIGCStalls && PrintGCDetails) {
-    ResourceMark rm; // JavaThread::name() allocates to convert to UTF8
-    gclog_or_tty->print_cr(
-      "Allocation failed. Thread \"%s\" is stalled by JNI critical section.",
-      JavaThread::current()->name());
+  MutexLocker   ml(JNICritical_lock);
+
+  if (needs_gc()) {
+    if (PrintJNIGCStalls && PrintGCDetails) {
+      ResourceMark rm; // JavaThread::name() allocates to convert to UTF8
+      gclog_or_tty->print_cr("%.3f: Allocation failed. Thread \"%s\" is stalled by JNI critical section, %d locked.",
+                             gclog_or_tty->time_stamp().seconds(), Thread::current()->name(), _jni_lock_count);
+    }
   }
-  MutexLocker   ml(JNICritical_lock);
+
   // Wait for _needs_gc  to be cleared
-  while (GC_locker::needs_gc()) {
+  while (needs_gc()) {
     JNICritical_lock->wait();
   }
 }
 
-void GC_locker::jni_lock_slow() {
+void GC_locker::jni_lock(JavaThread* thread) {
+  assert(!thread->in_critical(), "shouldn't currently be in a critical region");
   MutexLocker mu(JNICritical_lock);
   // Block entering threads if we know at least one thread is in a
   // JNI critical region and we need a GC.
   // We check that at least one thread is in a critical region before
   // blocking because blocked threads are woken up by a thread exiting
   // a JNI critical region.
-  while ((is_jni_active() && needs_gc()) || _doing_gc) {
+  while ((needs_gc() && is_jni_active()) || _doing_gc) {
     JNICritical_lock->wait();
   }
-  jni_lock();
+  thread->enter_critical();
+  _jni_lock_count++;
+  increment_debug_jni_lock_count();
 }
 
-void GC_locker::jni_unlock_slow() {
+void GC_locker::jni_unlock(JavaThread* thread) {
+  assert(thread->in_last_critical(), "should be exiting critical region");
   MutexLocker mu(JNICritical_lock);
-  jni_unlock();
+  _jni_lock_count--;
+  decrement_debug_jni_lock_count();
+  thread->exit_critical();
   if (needs_gc() && !is_jni_active()) {
     // We're the last thread out. Cause a GC to occur.
     // GC will also check is_active, so this check is not
@@ -74,11 +127,17 @@
       {
         // Must give up the lock while at a safepoint
         MutexUnlocker munlock(JNICritical_lock);
+        if (PrintJNIGCStalls && PrintGCDetails) {
+          ResourceMark rm; // JavaThread::name() allocates to convert to UTF8
+          gclog_or_tty->print_cr("%.3f: Thread \"%s\" is performing GC after exiting critical section, %d locked",
+                                 gclog_or_tty->time_stamp().seconds(), Thread::current()->name(), _jni_lock_count);
+        }
         Universe::heap()->collect(GCCause::_gc_locker);
       }
       _doing_gc = false;
     }
-    clear_needs_gc();
+
+    _needs_gc = false;
     JNICritical_lock->notify_all();
   }
 }
--- a/src/share/vm/memory/gcLocker.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/memory/gcLocker.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -51,53 +51,80 @@
 
 class GC_locker: public AllStatic {
  private:
-  static volatile jint _jni_lock_count;  // number of jni active instances
+  // The _jni_lock_count keeps track of the number of threads that are
+  // currently in a critical region.  It's only kept up to date when
+  // _needs_gc is true.  The current value is computed during
+  // safepointing and decremented during the slow path of GC_locker
+  // unlocking.
+  static volatile jint _jni_lock_count;  // number of jni active instances.
+
   static volatile jint _lock_count;      // number of other active instances
   static volatile bool _needs_gc;        // heap is filling, we need a GC
                                          // note: bool is typedef'd as jint
   static volatile bool _doing_gc;        // unlock_critical() is doing a GC
 
+#ifdef ASSERT
+  // This lock count is updated for all operations and is used to
+  // validate the jni_lock_count that is computed during safepoints.
+  static volatile jint _debug_jni_lock_count;
+#endif
+
   // Accessors
   static bool is_jni_active() {
+    assert(_needs_gc, "only valid when _needs_gc is set");
     return _jni_lock_count > 0;
   }
 
-  static void set_needs_gc() {
-    assert(SafepointSynchronize::is_at_safepoint(),
-      "needs_gc is only set at a safepoint");
-    _needs_gc = true;
-  }
-
-  static void clear_needs_gc() {
-    assert_lock_strong(JNICritical_lock);
-    _needs_gc = false;
-  }
+  // At a safepoint, visit all threads and count the number of active
+  // critical sections.  This is used to ensure that all active
+  // critical sections are exited before a new one is started.
+  static void verify_critical_count() NOT_DEBUG_RETURN;
 
-  static void jni_lock() {
-    Atomic::inc(&_jni_lock_count);
-    CHECK_UNHANDLED_OOPS_ONLY(
-      if (CheckUnhandledOops) { Thread::current()->_gc_locked_out_count++; })
-    assert(Universe::heap() == NULL || !Universe::heap()->is_gc_active(),
-           "locking failed");
-  }
+  static void jni_lock(JavaThread* thread);
+  static void jni_unlock(JavaThread* thread);
 
-  static void jni_unlock() {
-    Atomic::dec(&_jni_lock_count);
-    CHECK_UNHANDLED_OOPS_ONLY(
-      if (CheckUnhandledOops) { Thread::current()->_gc_locked_out_count--; })
+  static bool is_active_internal() {
+    verify_critical_count();
+    return _lock_count > 0 || _jni_lock_count > 0;
   }
 
-  static void jni_lock_slow();
-  static void jni_unlock_slow();
-
  public:
   // Accessors
-  static bool is_active();
+  static bool is_active() {
+    assert(_needs_gc || SafepointSynchronize::is_at_safepoint(), "only read at safepoint");
+    return is_active_internal();
+  }
   static bool needs_gc()       { return _needs_gc;                        }
+
   // Shorthand
-  static bool is_active_and_needs_gc() { return is_active() && needs_gc();}
+  static bool is_active_and_needs_gc() {
+    // Use is_active_internal since _needs_gc can change from true to
+    // false outside of a safepoint, triggering the assert in
+    // is_active.
+    return needs_gc() && is_active_internal();
+  }
 
-  // Calls set_needs_gc() if is_active() is true. Returns is_active().
+  // In debug mode track the locking state at all times
+  static void increment_debug_jni_lock_count() {
+#ifdef ASSERT
+    assert(_debug_jni_lock_count >= 0, "bad value");
+    Atomic::inc(&_debug_jni_lock_count);
+#endif
+  }
+  static void decrement_debug_jni_lock_count() {
+#ifdef ASSERT
+    assert(_debug_jni_lock_count > 0, "bad value");
+    Atomic::dec(&_debug_jni_lock_count);
+#endif
+  }
+
+  // Set the current lock count
+  static void set_jni_lock_count(int count) {
+    _jni_lock_count = count;
+    verify_critical_count();
+  }
+
+  // Sets _needs_gc if is_active() is true. Returns is_active().
   static bool check_active_before_gc();
 
   // Stalls the caller (who should not be in a jni critical section)
@@ -131,22 +158,24 @@
   // JNI critical regions are the only participants in this scheme
   // because they are, by spec, well bounded while in a critical region.
   //
-  // Each of the following two method is split into a fast path and a slow
-  // path. JNICritical_lock is only grabbed in the slow path.
+  // Each of the following two method is split into a fast path and a
+  // slow path. JNICritical_lock is only grabbed in the slow path.
   // _needs_gc is initially false and every java thread will go
-  // through the fast path (which does the same thing as the slow path
-  // when _needs_gc is false). When GC happens at a safepoint,
-  // GC_locker::is_active() is checked. Since there is no safepoint in the
-  // fast path of lock_critical() and unlock_critical(), there is no race
-  // condition between the fast path and GC. After _needs_gc is set at a
-  // safepoint, every thread will go through the slow path after the safepoint.
-  // Since after a safepoint, each of the following two methods is either
-  // entered from the method entry and falls into the slow path, or is
-  // resumed from the safepoints in the method, which only exist in the slow
-  // path. So when _needs_gc is set, the slow path is always taken, till
-  // _needs_gc is cleared.
+  // through the fast path, which simply increments or decrements the
+  // current thread's critical count.  When GC happens at a safepoint,
+  // GC_locker::is_active() is checked. Since there is no safepoint in
+  // the fast path of lock_critical() and unlock_critical(), there is
+  // no race condition between the fast path and GC. After _needs_gc
+  // is set at a safepoint, every thread will go through the slow path
+  // after the safepoint.  Since after a safepoint, each of the
+  // following two methods is either entered from the method entry and
+  // falls into the slow path, or is resumed from the safepoints in
+  // the method, which only exist in the slow path. So when _needs_gc
+  // is set, the slow path is always taken, till _needs_gc is cleared.
   static void lock_critical(JavaThread* thread);
   static void unlock_critical(JavaThread* thread);
+
+  static address needs_gc_address() { return (address) &_needs_gc; }
 };
 
 
--- a/src/share/vm/memory/gcLocker.inline.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/memory/gcLocker.inline.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,17 +27,6 @@
 
 #include "memory/gcLocker.hpp"
 
-inline bool GC_locker::is_active() {
-  return _lock_count > 0 || _jni_lock_count > 0;
-}
-
-inline bool GC_locker::check_active_before_gc() {
-  if (is_active()) {
-    set_needs_gc();
-  }
-  return is_active();
-}
-
 inline void GC_locker::lock() {
   // cast away volatile
   Atomic::inc(&_lock_count);
@@ -56,24 +45,28 @@
 
 inline void GC_locker::lock_critical(JavaThread* thread) {
   if (!thread->in_critical()) {
-    if (!needs_gc()) {
-      jni_lock();
-    } else {
-      jni_lock_slow();
+    if (needs_gc()) {
+      // jni_lock call calls enter_critical under the lock so that the
+      // global lock count and per thread count are in agreement.
+      jni_lock(thread);
+      return;
     }
+    increment_debug_jni_lock_count();
   }
   thread->enter_critical();
 }
 
 inline void GC_locker::unlock_critical(JavaThread* thread) {
+  if (thread->in_last_critical()) {
+    if (needs_gc()) {
+      // jni_unlock call calls exit_critical under the lock so that
+      // the global lock count and per thread count are in agreement.
+      jni_unlock(thread);
+      return;
+    }
+    decrement_debug_jni_lock_count();
+  }
   thread->exit_critical();
-  if (!thread->in_critical()) {
-    if (!needs_gc()) {
-      jni_unlock();
-    } else {
-      jni_unlock_slow();
-    }
-  }
 }
 
 #endif // SHARE_VM_MEMORY_GCLOCKER_INLINE_HPP
--- a/src/share/vm/memory/genCollectedHeap.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/memory/genCollectedHeap.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -479,11 +479,9 @@
 
   const size_t perm_prev_used = perm_gen()->used();
 
-  if (PrintHeapAtGC) {
-    Universe::print_heap_before_gc();
-    if (Verbose) {
-      gclog_or_tty->print_cr("GC Cause: %s", GCCause::to_string(gc_cause()));
-    }
+  print_heap_before_gc();
+  if (Verbose) {
+    gclog_or_tty->print_cr("GC Cause: %s", GCCause::to_string(gc_cause()));
   }
 
   {
@@ -685,9 +683,7 @@
   AdaptiveSizePolicy* sp = gen_policy()->size_policy();
   AdaptiveSizePolicyOutput(sp, total_collections());
 
-  if (PrintHeapAtGC) {
-    Universe::print_heap_after_gc();
-  }
+  print_heap_after_gc();
 
 #ifdef TRACESPINNING
   ParallelTaskTerminator::print_termination_counts();
@@ -703,7 +699,7 @@
   return collector_policy()->satisfy_failed_allocation(size, is_tlab);
 }
 
-void GenCollectedHeap::set_par_threads(int t) {
+void GenCollectedHeap::set_par_threads(uint t) {
   SharedHeap::set_par_threads(t);
   _gen_process_strong_tasks->set_n_threads(t);
 }
@@ -957,7 +953,7 @@
   return result;
 }
 
-// Returns "TRUE" iff "p" points into the allocated area of the heap.
+// Returns "TRUE" iff "p" points into the committed areas of the heap.
 bool GenCollectedHeap::is_in(const void* p) const {
   #ifndef ASSERT
   guarantee(VerifyBeforeGC   ||
@@ -1460,26 +1456,22 @@
 };
 
 jlong GenCollectedHeap::millis_since_last_gc() {
-  jlong now = os::javaTimeMillis();
+  // We need a monotonically non-deccreasing time in ms but
+  // os::javaTimeMillis() does not guarantee monotonicity.
+  jlong now = os::javaTimeNanos() / NANOSECS_PER_MILLISEC;
   GenTimeOfLastGCClosure tolgc_cl(now);
   // iterate over generations getting the oldest
   // time that a generation was collected
   generation_iterate(&tolgc_cl, false);
   tolgc_cl.do_generation(perm_gen());
-  // XXX Despite the assert above, since javaTimeMillis()
-  // doesnot guarantee monotonically increasing return
-  // values (note, i didn't say "strictly monotonic"),
-  // we need to guard against getting back a time
-  // later than now. This should be fixed by basing
-  // on someting like gethrtime() which guarantees
-  // monotonicity. Note that cond_wait() is susceptible
-  // to a similar problem, because its interface is
-  // based on absolute time in the form of the
-  // system time's notion of UCT. See also 4506635
-  // for yet another problem of similar nature. XXX
+
+  // javaTimeNanos() is guaranteed to be monotonically non-decreasing
+  // provided the underlying platform provides such a time source
+  // (and it is bug free). So we still have to guard against getting
+  // back a time later than 'now'.
   jlong retVal = now - tolgc_cl.time();
   if (retVal < 0) {
-    NOT_PRODUCT(warning("time warp: %d", retVal);)
+    NOT_PRODUCT(warning("time warp: "INT64_FORMAT, retVal);)
     return 0;
   }
   return retVal;
--- a/src/share/vm/memory/genCollectedHeap.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/memory/genCollectedHeap.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -198,7 +198,7 @@
   // Mostly used for testing purposes. Caller does not hold the Heap_lock on entry.
   void collect(GCCause::Cause cause, int max_level);
 
-  // Returns "TRUE" iff "p" points into the allocated area of the heap.
+  // Returns "TRUE" iff "p" points into the committed areas of the heap.
   // The methods is_in(), is_in_closed_subset() and is_in_youngest() may
   // be expensive to compute in general, so, to prevent
   // their inadvertent use in product jvm's, we restrict their use to
@@ -419,8 +419,7 @@
   // asserted to be this type.
   static GenCollectedHeap* heap();
 
-  void set_par_threads(int t);
-
+  void set_par_threads(uint t);
 
   // Invoke the "do_oop" method of one of the closures "not_older_gens"
   // or "older_gens" on root locations for the generation at
--- a/src/share/vm/memory/genMarkSweep.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/memory/genMarkSweep.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -176,7 +176,11 @@
 
   // Update time of last gc for all generations we collected
   // (which curently is all the generations in the heap).
-  gch->update_time_of_last_gc(os::javaTimeMillis());
+  // We need to use a monotonically non-deccreasing time in ms
+  // or we will see time-warp warnings and os::javaTimeMillis()
+  // does not guarantee monotonicity.
+  jlong now = os::javaTimeNanos() / NANOSECS_PER_MILLISEC;
+  gch->update_time_of_last_gc(now);
 }
 
 void GenMarkSweep::allocate_stacks() {
@@ -254,7 +258,6 @@
 void GenMarkSweep::mark_sweep_phase1(int level,
                                   bool clear_all_softrefs) {
   // Recursively traverse all live objects and mark them
-  EventMark m("1 mark object");
   TraceTime tm("phase 1", PrintGC && Verbose, true, gclog_or_tty);
   trace(" 1");
 
@@ -325,7 +328,6 @@
   GenCollectedHeap* gch = GenCollectedHeap::heap();
   Generation* pg = gch->perm_gen();
 
-  EventMark m("2 compute new addresses");
   TraceTime tm("phase 2", PrintGC && Verbose, true, gclog_or_tty);
   trace("2");
 
@@ -350,7 +352,6 @@
   Generation* pg = gch->perm_gen();
 
   // Adjust the pointers to reflect the new locations
-  EventMark m("3 adjust pointers");
   TraceTime tm("phase 3", PrintGC && Verbose, true, gclog_or_tty);
   trace("3");
 
@@ -411,7 +412,6 @@
   GenCollectedHeap* gch = GenCollectedHeap::heap();
   Generation* pg = gch->perm_gen();
 
-  EventMark m("4 compact heap");
   TraceTime tm("phase 4", PrintGC && Verbose, true, gclog_or_tty);
   trace("4");
 
--- a/src/share/vm/memory/generation.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/memory/generation.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -220,7 +220,7 @@
   MemRegion prev_used_region() const { return _prev_used_region; }
   virtual void  save_used_region()   { _prev_used_region = used_region(); }
 
-  // Returns "TRUE" iff "p" points into an allocated object in the generation.
+  // Returns "TRUE" iff "p" points into the committed areas in the generation.
   // For some kinds of generations, this may be an expensive operation.
   // To avoid performance problems stemming from its inadvertent use in
   // product jvm's, we restrict its use to assertion checking or
@@ -413,10 +413,13 @@
   // Time (in ms) when we were last collected or now if a collection is
   // in progress.
   virtual jlong time_of_last_gc(jlong now) {
-    // XXX See note in genCollectedHeap::millis_since_last_gc()
+    // Both _time_of_last_gc and now are set using a time source
+    // that guarantees monotonically non-decreasing values provided
+    // the underlying platform provides such a source. So we still
+    // have to guard against non-monotonicity.
     NOT_PRODUCT(
       if (now < _time_of_last_gc) {
-        warning("time warp: %d to %d", _time_of_last_gc, now);
+        warning("time warp: "INT64_FORMAT" to "INT64_FORMAT, _time_of_last_gc, now);
       }
     )
     return _time_of_last_gc;
--- a/src/share/vm/memory/referenceProcessor.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/memory/referenceProcessor.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -43,7 +43,9 @@
 }
 
 void ReferenceProcessor::init_statics() {
-  jlong now = os::javaTimeMillis();
+  // We need a monotonically non-deccreasing time in ms but
+  // os::javaTimeMillis() does not guarantee monotonicity.
+  jlong now = os::javaTimeNanos() / NANOSECS_PER_MILLISEC;
 
   // Initialize the soft ref timestamp clock.
   _soft_ref_timestamp_clock = now;
@@ -86,9 +88,9 @@
 
 ReferenceProcessor::ReferenceProcessor(MemRegion span,
                                        bool      mt_processing,
-                                       int       mt_processing_degree,
+                                       uint      mt_processing_degree,
                                        bool      mt_discovery,
-                                       int       mt_discovery_degree,
+                                       uint      mt_discovery_degree,
                                        bool      atomic_discovery,
                                        BoolObjectClosure* is_alive_non_header,
                                        bool      discovered_list_needs_barrier)  :
@@ -103,7 +105,7 @@
   _span = span;
   _discovery_is_atomic = atomic_discovery;
   _discovery_is_mt     = mt_discovery;
-  _num_q               = MAX2(1, mt_processing_degree);
+  _num_q               = MAX2(1U, mt_processing_degree);
   _max_num_q           = MAX2(_num_q, mt_discovery_degree);
   _discovered_refs     = NEW_C_HEAP_ARRAY(DiscoveredList,
                                           _max_num_q * number_of_subclasses_of_ref());
@@ -116,7 +118,7 @@
   _discoveredPhantomRefs = &_discoveredFinalRefs[_max_num_q];
 
   // Initialize all entries to NULL
-  for (int i = 0; i < _max_num_q * number_of_subclasses_of_ref(); i++) {
+  for (uint i = 0; i < _max_num_q * number_of_subclasses_of_ref(); i++) {
     _discovered_refs[i].set_head(NULL);
     _discovered_refs[i].set_length(0);
   }
@@ -131,7 +133,7 @@
 #ifndef PRODUCT
 void ReferenceProcessor::verify_no_references_recorded() {
   guarantee(!_discovering_refs, "Discovering refs?");
-  for (int i = 0; i < _max_num_q * number_of_subclasses_of_ref(); i++) {
+  for (uint i = 0; i < _max_num_q * number_of_subclasses_of_ref(); i++) {
     guarantee(_discovered_refs[i].is_empty(),
               "Found non-empty discovered list");
   }
@@ -139,7 +141,7 @@
 #endif
 
 void ReferenceProcessor::weak_oops_do(OopClosure* f) {
-  for (int i = 0; i < _max_num_q * number_of_subclasses_of_ref(); i++) {
+  for (uint i = 0; i < _max_num_q * number_of_subclasses_of_ref(); i++) {
     if (UseCompressedOops) {
       f->do_oop((narrowOop*)_discovered_refs[i].adr_head());
     } else {
@@ -151,7 +153,10 @@
 void ReferenceProcessor::update_soft_ref_master_clock() {
   // Update (advance) the soft ref master clock field. This must be done
   // after processing the soft ref list.
-  jlong now = os::javaTimeMillis();
+
+  // We need a monotonically non-deccreasing time in ms but
+  // os::javaTimeMillis() does not guarantee monotonicity.
+  jlong now = os::javaTimeNanos() / NANOSECS_PER_MILLISEC;
   jlong soft_ref_clock = java_lang_ref_SoftReference::clock();
   assert(soft_ref_clock == _soft_ref_timestamp_clock, "soft ref clocks out of sync");
 
@@ -161,10 +166,11 @@
             _soft_ref_timestamp_clock, now);
   }
   )
-  // In product mode, protect ourselves from system time being adjusted
-  // externally and going backward; see note in the implementation of
-  // GenCollectedHeap::time_since_last_gc() for the right way to fix
-  // this uniformly throughout the VM; see bug-id 4741166. XXX
+  // The values of now and _soft_ref_timestamp_clock are set using
+  // javaTimeNanos(), which is guaranteed to be monotonically
+  // non-decreasing provided the underlying platform provides such
+  // a time source (and it is bug free).
+  // In product mode, however, protect ourselves from non-monotonicty.
   if (now > _soft_ref_timestamp_clock) {
     _soft_ref_timestamp_clock = now;
     java_lang_ref_SoftReference::set_clock(now);
@@ -431,7 +437,7 @@
     task_executor->execute(tsk);
   } else {
     // Serial code: call the parent class's implementation
-    for (int i = 0; i < _max_num_q * number_of_subclasses_of_ref(); i++) {
+    for (uint i = 0; i < _max_num_q * number_of_subclasses_of_ref(); i++) {
       enqueue_discovered_reflist(_discovered_refs[i], pending_list_addr);
       _discovered_refs[i].set_head(NULL);
       _discovered_refs[i].set_length(0);
@@ -690,7 +696,7 @@
 
 void ReferenceProcessor::abandon_partial_discovery() {
   // loop over the lists
-  for (int i = 0; i < _max_num_q * number_of_subclasses_of_ref(); i++) {
+  for (uint i = 0; i < _max_num_q * number_of_subclasses_of_ref(); i++) {
     if (TraceReferenceGC && PrintGCDetails && ((i % _max_num_q) == 0)) {
       gclog_or_tty->print_cr("\nAbandoning %s discovered list", list_name(i));
     }
@@ -781,7 +787,7 @@
     gclog_or_tty->print_cr("\nBalance ref_lists ");
   }
 
-  for (int i = 0; i < _max_num_q; ++i) {
+  for (uint i = 0; i < _max_num_q; ++i) {
     total_refs += ref_lists[i].length();
     if (TraceReferenceGC && PrintGCDetails) {
       gclog_or_tty->print("%d ", ref_lists[i].length());
@@ -791,8 +797,8 @@
     gclog_or_tty->print_cr(" = %d", total_refs);
   }
   size_t avg_refs = total_refs / _num_q + 1;
-  int to_idx = 0;
-  for (int from_idx = 0; from_idx < _max_num_q; from_idx++) {
+  uint to_idx = 0;
+  for (uint from_idx = 0; from_idx < _max_num_q; from_idx++) {
     bool move_all = false;
     if (from_idx >= _num_q) {
       move_all = ref_lists[from_idx].length() > 0;
@@ -851,7 +857,7 @@
   }
 #ifdef ASSERT
   size_t balanced_total_refs = 0;
-  for (int i = 0; i < _max_num_q; ++i) {
+  for (uint i = 0; i < _max_num_q; ++i) {
     balanced_total_refs += ref_lists[i].length();
     if (TraceReferenceGC && PrintGCDetails) {
       gclog_or_tty->print("%d ", ref_lists[i].length());
@@ -897,7 +903,7 @@
   }
   if (PrintReferenceGC && PrintGCDetails) {
     size_t total = 0;
-    for (int i = 0; i < _max_num_q; ++i) {
+    for (uint i = 0; i < _max_num_q; ++i) {
       total += refs_lists[i].length();
     }
     gclog_or_tty->print(", %u refs", total);
@@ -913,7 +919,7 @@
       RefProcPhase1Task phase1(*this, refs_lists, policy, true /*marks_oops_alive*/);
       task_executor->execute(phase1);
     } else {
-      for (int i = 0; i < _max_num_q; i++) {
+      for (uint i = 0; i < _max_num_q; i++) {
         process_phase1(refs_lists[i], policy,
                        is_alive, keep_alive, complete_gc);
       }
@@ -929,7 +935,7 @@
     RefProcPhase2Task phase2(*this, refs_lists, !discovery_is_atomic() /*marks_oops_alive*/);
     task_executor->execute(phase2);
   } else {
-    for (int i = 0; i < _max_num_q; i++) {
+    for (uint i = 0; i < _max_num_q; i++) {
       process_phase2(refs_lists[i], is_alive, keep_alive, complete_gc);
     }
   }
@@ -940,7 +946,7 @@
     RefProcPhase3Task phase3(*this, refs_lists, clear_referent, true /*marks_oops_alive*/);
     task_executor->execute(phase3);
   } else {
-    for (int i = 0; i < _max_num_q; i++) {
+    for (uint i = 0; i < _max_num_q; i++) {
       process_phase3(refs_lists[i], clear_referent,
                      is_alive, keep_alive, complete_gc);
     }
@@ -949,7 +955,7 @@
 
 void ReferenceProcessor::clean_up_discovered_references() {
   // loop over the lists
-  for (int i = 0; i < _max_num_q * number_of_subclasses_of_ref(); i++) {
+  for (uint i = 0; i < _max_num_q * number_of_subclasses_of_ref(); i++) {
     if (TraceReferenceGC && PrintGCDetails && ((i % _max_num_q) == 0)) {
       gclog_or_tty->print_cr(
         "\nScrubbing %s discovered list of Null referents",
@@ -994,7 +1000,7 @@
 }
 
 inline DiscoveredList* ReferenceProcessor::get_discovered_list(ReferenceType rt) {
-  int id = 0;
+  uint id = 0;
   // Determine the queue index to use for this object.
   if (_discovery_is_mt) {
     // During a multi-threaded discovery phase,
@@ -1276,7 +1282,7 @@
   {
     TraceTime tt("Preclean SoftReferences", PrintGCDetails && PrintReferenceGC,
               false, gclog_or_tty);
-    for (int i = 0; i < _max_num_q; i++) {
+    for (uint i = 0; i < _max_num_q; i++) {
       if (yield->should_return()) {
         return;
       }
@@ -1289,7 +1295,7 @@
   {
     TraceTime tt("Preclean WeakReferences", PrintGCDetails && PrintReferenceGC,
               false, gclog_or_tty);
-    for (int i = 0; i < _max_num_q; i++) {
+    for (uint i = 0; i < _max_num_q; i++) {
       if (yield->should_return()) {
         return;
       }
@@ -1302,7 +1308,7 @@
   {
     TraceTime tt("Preclean FinalReferences", PrintGCDetails && PrintReferenceGC,
               false, gclog_or_tty);
-    for (int i = 0; i < _max_num_q; i++) {
+    for (uint i = 0; i < _max_num_q; i++) {
       if (yield->should_return()) {
         return;
       }
@@ -1315,7 +1321,7 @@
   {
     TraceTime tt("Preclean PhantomReferences", PrintGCDetails && PrintReferenceGC,
               false, gclog_or_tty);
-    for (int i = 0; i < _max_num_q; i++) {
+    for (uint i = 0; i < _max_num_q; i++) {
       if (yield->should_return()) {
         return;
       }
@@ -1380,7 +1386,7 @@
   )
 }
 
-const char* ReferenceProcessor::list_name(int i) {
+const char* ReferenceProcessor::list_name(uint i) {
    assert(i >= 0 && i <= _max_num_q * number_of_subclasses_of_ref(),
           "Out of bounds index");
 
@@ -1404,7 +1410,7 @@
 #ifndef PRODUCT
 void ReferenceProcessor::clear_discovered_references() {
   guarantee(!_discovering_refs, "Discovering refs?");
-  for (int i = 0; i < _max_num_q * number_of_subclasses_of_ref(); i++) {
+  for (uint i = 0; i < _max_num_q * number_of_subclasses_of_ref(); i++) {
     clear_discovered_references(_discovered_refs[i]);
   }
 }
--- a/src/share/vm/memory/referenceProcessor.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/memory/referenceProcessor.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -231,7 +231,7 @@
   bool        _enqueuing_is_done;       // true if all weak references enqueued
   bool        _processing_is_mt;        // true during phases when
                                         // reference processing is MT.
-  int         _next_id;                 // round-robin mod _num_q counter in
+  uint        _next_id;                 // round-robin mod _num_q counter in
                                         // support of work distribution
 
   // For collectors that do not keep GC liveness information
@@ -252,9 +252,9 @@
   // The discovered ref lists themselves
 
   // The active MT'ness degree of the queues below
-  int             _num_q;
+  uint             _num_q;
   // The maximum MT'ness degree of the queues below
-  int             _max_num_q;
+  uint             _max_num_q;
 
   // Master array of discovered oops
   DiscoveredList* _discovered_refs;
@@ -268,9 +268,9 @@
  public:
   static int number_of_subclasses_of_ref() { return (REF_PHANTOM - REF_OTHER); }
 
-  int num_q()                              { return _num_q; }
-  int max_num_q()                          { return _max_num_q; }
-  void set_active_mt_degree(int v)         { _num_q = v; }
+  uint num_q()                             { return _num_q; }
+  uint max_num_q()                         { return _max_num_q; }
+  void set_active_mt_degree(uint v)        { _num_q = v; }
 
   DiscoveredList* discovered_refs()        { return _discovered_refs; }
 
@@ -368,7 +368,7 @@
 
   // Returns the name of the discovered reference list
   // occupying the i / _num_q slot.
-  const char* list_name(int i);
+  const char* list_name(uint i);
 
   void enqueue_discovered_reflists(HeapWord* pending_list_addr, AbstractRefProcTaskExecutor* task_executor);
 
@@ -388,8 +388,8 @@
                                    YieldClosure*      yield);
 
   // round-robin mod _num_q (not: _not_ mode _max_num_q)
-  int next_id() {
-    int id = _next_id;
+  uint next_id() {
+    uint id = _next_id;
     if (++_next_id == _num_q) {
       _next_id = 0;
     }
@@ -434,8 +434,8 @@
 
   // Default parameters give you a vanilla reference processor.
   ReferenceProcessor(MemRegion span,
-                     bool mt_processing = false, int mt_processing_degree = 1,
-                     bool mt_discovery  = false, int mt_discovery_degree  = 1,
+                     bool mt_processing = false, uint mt_processing_degree = 1,
+                     bool mt_discovery  = false, uint mt_discovery_degree  = 1,
                      bool atomic_discovery = true,
                      BoolObjectClosure* is_alive_non_header = NULL,
                      bool discovered_list_needs_barrier = false);
--- a/src/share/vm/memory/sharedHeap.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/memory/sharedHeap.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -94,7 +94,7 @@
              && _thread_holds_heap_lock_for_gc);
 }
 
-void SharedHeap::set_par_threads(int t) {
+void SharedHeap::set_par_threads(uint t) {
   assert(t == 0 || !UseSerialGC, "Cannot have parallel threads");
   _n_par_threads = t;
   _process_strong_tasks->set_n_threads(t);
--- a/src/share/vm/memory/sharedHeap.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/memory/sharedHeap.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -287,7 +287,7 @@
 
   // Sets the number of parallel threads that will be doing tasks
   // (such as process strong roots) subsequently.
-  virtual void set_par_threads(int t);
+  virtual void set_par_threads(uint t);
 
   int n_termination();
   void set_n_termination(int t);
--- a/src/share/vm/memory/space.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/memory/space.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -304,11 +304,6 @@
   CompactibleSpace::clear(mangle_space);
 }
 
-bool Space::is_in(const void* p) const {
-  HeapWord* b = block_start_const(p);
-  return b != NULL && block_is_obj(b);
-}
-
 bool ContiguousSpace::is_in(const void* p) const {
   return _bottom <= p && p < _top;
 }
--- a/src/share/vm/memory/space.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/memory/space.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -187,7 +187,7 @@
   // expensive operation. To prevent performance problems
   // on account of its inadvertent use in product jvm's,
   // we restrict its use to assertion checks only.
-  virtual bool is_in(const void* p) const;
+  virtual bool is_in(const void* p) const = 0;
 
   // Returns true iff the given reserved memory of the space contains the
   // given address.
--- a/src/share/vm/memory/universe.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/memory/universe.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1303,22 +1303,22 @@
   }
 }
 
-void Universe::print_heap_before_gc(outputStream* st) {
+void Universe::print_heap_before_gc(outputStream* st, bool ignore_extended) {
   st->print_cr("{Heap before GC invocations=%u (full %u):",
                heap()->total_collections(),
                heap()->total_full_collections());
-  if (!PrintHeapAtGCExtended) {
+  if (!PrintHeapAtGCExtended || ignore_extended) {
     heap()->print_on(st);
   } else {
     heap()->print_extended_on(st);
   }
 }
 
-void Universe::print_heap_after_gc(outputStream* st) {
+void Universe::print_heap_after_gc(outputStream* st, bool ignore_extended) {
   st->print_cr("Heap after GC invocations=%u (full %u):",
                heap()->total_collections(),
                heap()->total_full_collections());
-  if (!PrintHeapAtGCExtended) {
+  if (!PrintHeapAtGCExtended || ignore_extended) {
     heap()->print_on(st);
   } else {
     heap()->print_extended_on(st);
--- a/src/share/vm/memory/universe.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/memory/universe.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -424,8 +424,8 @@
   static void print_heap_at_SIGBREAK();
   static void print_heap_before_gc() { print_heap_before_gc(gclog_or_tty); }
   static void print_heap_after_gc()  { print_heap_after_gc(gclog_or_tty); }
-  static void print_heap_before_gc(outputStream* st);
-  static void print_heap_after_gc(outputStream* st);
+  static void print_heap_before_gc(outputStream* st, bool ignore_extended = false);
+  static void print_heap_after_gc(outputStream* st, bool ignore_extended = false);
 
   // Change the number of dummy objects kept reachable by the full gc dummy
   // array; this should trigger relocation in a sliding compaction collector.
--- a/src/share/vm/oops/arrayKlass.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/oops/arrayKlass.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -73,7 +73,7 @@
   oop* adr_component_mirror()           { return (oop*)&this->_component_mirror;}
 
   // Compiler/Interpreter offset
-  static ByteSize component_mirror_offset() { return byte_offset_of(arrayKlass, _component_mirror); }
+  static ByteSize component_mirror_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(arrayKlass, _component_mirror)); }
 
   virtual klassOop java_super() const;//{ return SystemDictionary::Object_klass(); }
 
--- a/src/share/vm/oops/arrayOop.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/oops/arrayOop.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -29,6 +29,7 @@
 #ifndef PRODUCT
 
 #include "oops/arrayOop.hpp"
+#include "oops/oop.inline.hpp"
 #include "utilities/globalDefinitions.hpp"
 
 bool arrayOopDesc::check_max_length_overflow(BasicType type) {
@@ -38,9 +39,7 @@
   return (julong)(size_t)bytes == bytes;
 }
 
-bool arrayOopDesc::test_max_array_length() {
-  tty->print_cr("test_max_array_length");
-
+void arrayOopDesc::test_max_array_length() {
   assert(check_max_length_overflow(T_BOOLEAN), "size_t overflow for boolean array");
   assert(check_max_length_overflow(T_CHAR), "size_t overflow for char array");
   assert(check_max_length_overflow(T_FLOAT), "size_t overflow for float array");
@@ -54,8 +53,6 @@
   assert(check_max_length_overflow(T_NARROWOOP), "size_t overflow for narrowOop array");
 
   // T_VOID and T_ADDRESS are not supported by max_array_length()
-
-  return true;
 }
 
 
--- a/src/share/vm/oops/arrayOop.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/oops/arrayOop.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -128,7 +128,7 @@
 #ifndef PRODUCT
   static bool check_max_length_overflow(BasicType type);
   static int32_t old_max_array_length(BasicType type);
-  static bool test_max_array_length();
+  static void test_max_array_length();
 #endif
 };
 
--- a/src/share/vm/oops/constantPoolOop.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/oops/constantPoolOop.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -269,7 +269,7 @@
 methodOop constantPoolOopDesc::method_at_if_loaded(constantPoolHandle cpool,
                                                    int which, Bytecodes::Code invoke_code) {
   assert(!constantPoolCacheOopDesc::is_secondary_index(which), "no indy instruction here");
-  if (cpool->cache() == NULL)  return false;  // nothing to load yet
+  if (cpool->cache() == NULL)  return NULL;  // nothing to load yet
   int cache_index = which - CPCACHE_INDEX_TAG;
   if (!(cache_index >= 0 && cache_index < cpool->cache()->length())) {
     if (PrintMiscellaneous && (Verbose||WizardMode)) {
@@ -346,7 +346,6 @@
   int cpc_index = operand;
   DEBUG_ONLY(cpc_index -= CPCACHE_INDEX_TAG);
   assert((int)(u2)cpc_index == cpc_index, "clean u2");
-  assert(cache() != NULL, "cache not null, maybe class is resolved but not rewritten yet");
   int member_index = cache()->entry_at(cpc_index)->constant_pool_index();
   return member_index;
 }
--- a/src/share/vm/oops/instanceKlass.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/oops/instanceKlass.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -208,7 +208,7 @@
   // abort if someone beat us to the initialization
   if (!this_oop->is_not_initialized()) return;  // note: not equivalent to is_initialized()
 
-  ClassState old_state = this_oop->_init_state;
+  ClassState old_state = this_oop->init_state();
   link_class_impl(this_oop, true, THREAD);
   if (HAS_PENDING_EXCEPTION) {
     CLEAR_PENDING_EXCEPTION;
@@ -2479,7 +2479,7 @@
   bool good_state = as_klassOop()->is_shared() ? (_init_state <= state)
                                                : (_init_state < state);
   assert(good_state || state == allocated, "illegal state transition");
-  _init_state = state;
+  _init_state = (u1)state;
 }
 #endif
 
--- a/src/share/vm/oops/instanceKlass.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/oops/instanceKlass.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -227,20 +227,19 @@
   // (including inherited fields but after header_size()).
   int             _nonstatic_field_size;
   int             _static_field_size;    // number words used by static fields (oop and non-oop) in this klass
-  int             _static_oop_field_count;// number of static oop fields in this klass
+  u2              _static_oop_field_count;// number of static oop fields in this klass
+  u2              _java_fields_count;    // The number of declared Java fields
   int             _nonstatic_oop_map_size;// size in words of nonstatic oop map blocks
-  int             _java_fields_count;    // The number of declared Java fields
+
   bool            _is_marked_dependent;  // used for marking during flushing and deoptimization
   bool            _rewritten;            // methods rewritten.
   bool            _has_nonstatic_fields; // for sizing with UseCompressedOops
   bool            _should_verify_class;  // allow caching of preverification
   u2              _minor_version;        // minor version number of class file
   u2              _major_version;        // major version number of class file
-  ClassState      _init_state;           // state of class
   Thread*         _init_thread;          // Pointer to current thread doing initialization (to handle recusive initialization)
   int             _vtable_len;           // length of Java vtable (in words)
   int             _itable_len;           // length of Java itable (in words)
-  ReferenceType   _reference_type;       // reference type
   OopMapCache*    volatile _oop_map_cache;   // OopMapCache for all methods in the klass (allocated lazily)
   JNIid*          _jni_ids;              // First JNI identifier for static fields in this class
   jmethodID*      _methods_jmethod_ids;  // jmethodIDs corresponding to method_idnum, or NULL if none
@@ -260,6 +259,13 @@
   JvmtiCachedClassFieldMap* _jvmti_cached_class_field_map;  // JVMTI: used during heap iteration
   volatile u2     _idnum_allocated_count;         // JNI/JVMTI: increments with the addition of methods, old ids don't change
 
+  // Class states are defined as ClassState (see above).
+  // Place the _init_state here to utilize the unused 2-byte after
+  // _idnum_allocated_count.
+  u1              _init_state;                    // state of class
+
+  u1              _reference_type;                // reference type
+
   // embedded Java vtable follows here
   // embedded Java itables follows here
   // embedded static fields follows here
@@ -279,8 +285,8 @@
   int static_field_size() const            { return _static_field_size; }
   void set_static_field_size(int size)     { _static_field_size = size; }
 
-  int static_oop_field_count() const        { return _static_oop_field_count; }
-  void set_static_oop_field_count(int size) { _static_oop_field_count = size; }
+  int static_oop_field_count() const       { return (int)_static_oop_field_count; }
+  void set_static_oop_field_count(u2 size) { _static_oop_field_count = size; }
 
   // Java vtable
   int  vtable_length() const               { return _vtable_len; }
@@ -320,14 +326,14 @@
   Symbol* field_signature   (int index) const { return field(index)->signature(constants()); }
 
   // Number of Java declared fields
-  int java_fields_count() const           { return _java_fields_count; }
+  int java_fields_count() const           { return (int)_java_fields_count; }
 
   // Number of fields including any injected fields
   int all_fields_count() const            { return _fields->length() / sizeof(FieldInfo::field_slots); }
 
   typeArrayOop fields() const              { return _fields; }
 
-  void set_fields(typeArrayOop f, int java_fields_count) {
+  void set_fields(typeArrayOop f, u2 java_fields_count) {
     oop_store_without_check((oop*) &_fields, (oop) f);
     _java_fields_count = java_fields_count;
   }
@@ -377,7 +383,7 @@
   bool is_being_initialized() const        { return _init_state == being_initialized; }
   bool is_in_error_state() const           { return _init_state == initialization_error; }
   bool is_reentrant_initialization(Thread *thread)  { return thread == _init_thread; }
-  int  get_init_state()                    { return _init_state; } // Useful for debugging
+  ClassState  init_state()                 { return (ClassState)_init_state; }
   bool is_rewritten() const                { return _rewritten; }
 
   // defineClass specified verification
@@ -402,10 +408,13 @@
   void eager_initialize(Thread *thread);
 
   // reference type
-  ReferenceType reference_type() const     { return _reference_type; }
-  void set_reference_type(ReferenceType t) { _reference_type = t; }
+  ReferenceType reference_type() const     { return (ReferenceType)_reference_type; }
+  void set_reference_type(ReferenceType t) {
+    assert(t == (u1)t, "overflow");
+    _reference_type = (u1)t;
+  }
 
-  static int reference_type_offset_in_bytes() { return offset_of(instanceKlass, _reference_type); }
+  static ByteSize reference_type_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(instanceKlass, _reference_type)); }
 
   // find local field, returns true if found
   bool find_local_field(Symbol* name, Symbol* sig, fieldDescriptor* fd) const;
@@ -565,9 +574,9 @@
   void set_method_annotations_of(int idnum, typeArrayOop anno)
                                                 { set_methods_annotations_of(idnum, anno, &_methods_annotations); }
   void set_method_parameter_annotations_of(int idnum, typeArrayOop anno)
-                                                { set_methods_annotations_of(idnum, anno, &_methods_annotations); }
+                                                { set_methods_annotations_of(idnum, anno, &_methods_parameter_annotations); }
   void set_method_default_annotations_of(int idnum, typeArrayOop anno)
-                                                { set_methods_annotations_of(idnum, anno, &_methods_annotations); }
+                                                { set_methods_annotations_of(idnum, anno, &_methods_default_annotations); }
 
   // allocation
   DEFINE_ALLOCATE_PERMANENT(instanceKlass);
@@ -616,8 +625,8 @@
   void set_breakpoints(BreakpointInfo* bps) { _breakpoints = bps; };
 
   // support for stub routines
-  static int init_state_offset_in_bytes()    { return offset_of(instanceKlass, _init_state); }
-  static int init_thread_offset_in_bytes()   { return offset_of(instanceKlass, _init_thread); }
+  static ByteSize init_state_offset()  { return in_ByteSize(sizeof(klassOopDesc) + offset_of(instanceKlass, _init_state)); }
+  static ByteSize init_thread_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(instanceKlass, _init_thread)); }
 
   // subclass/subinterface checks
   bool implements_interface(klassOop k) const;
@@ -754,7 +763,7 @@
 #ifdef ASSERT
   void set_init_state(ClassState state);
 #else
-  void set_init_state(ClassState state) { _init_state = state; }
+  void set_init_state(ClassState state) { _init_state = (u1)state; }
 #endif
   void set_rewritten()                  { _rewritten = true; }
   void set_init_thread(Thread *thread)  { _init_thread = thread; }
--- a/src/share/vm/oops/klass.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/oops/klass.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -144,11 +144,13 @@
     }
     kl->set_secondary_supers(NULL);
     oop_store_without_check((oop*) &kl->_primary_supers[0], k);
-    kl->set_super_check_offset(primary_supers_offset_in_bytes() + sizeof(oopDesc));
+    kl->set_super_check_offset(in_bytes(primary_supers_offset()));
   }
 
   kl->set_java_mirror(NULL);
+#ifdef GRAAL
   kl->set_graal_mirror(NULL);
+#endif
   kl->set_modifier_flags(0);
   kl->set_layout_helper(Klass::_lh_neutral_value);
   kl->set_name(NULL);
@@ -159,6 +161,7 @@
   kl->set_next_sibling(NULL);
   kl->set_alloc_count(0);
   kl->set_alloc_size(0);
+  TRACE_SET_KLASS_TRACE_ID(kl, 0);
 
   kl->set_prototype_header(markOopDesc::prototype());
   kl->set_biased_lock_revocation_count(0);
--- a/src/share/vm/oops/klass.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/oops/klass.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -33,6 +33,7 @@
 #include "oops/klassPS.hpp"
 #include "oops/oop.hpp"
 #include "runtime/orderAccess.hpp"
+#include "trace/traceMacros.hpp"
 #include "utilities/accessFlags.hpp"
 #ifndef SERIALGC
 #include "gc_implementation/concurrentMarkSweep/cmsOopClosures.hpp"
@@ -80,6 +81,7 @@
 //    [last_biased_lock_bulk_revocation_time] (64 bits)
 //    [prototype_header]
 //    [biased_lock_revocation_count]
+//    [trace_id]
 
 
 // Forward declarations.
@@ -265,6 +267,7 @@
   markOop  _prototype_header;   // Used when biased locking is both enabled and disabled for this type
   jint     _biased_lock_revocation_count;
 
+  TRACE_DEFINE_KLASS_TRACE_ID;
  public:
 
   // returns the enclosing klassOop
@@ -315,7 +318,7 @@
   // Can this klass be a primary super?  False for interfaces and arrays of
   // interfaces.  False also for arrays or classes with long super chains.
   bool can_be_primary_super() const {
-    const juint secondary_offset = secondary_super_cache_offset_in_bytes() + sizeof(oopDesc);
+    const juint secondary_offset = in_bytes(secondary_super_cache_offset());
     return super_check_offset() != secondary_offset;
   }
   virtual bool can_be_primary_super_slow() const;
@@ -325,7 +328,7 @@
     if (!can_be_primary_super()) {
       return primary_super_limit();
     } else {
-      juint d = (super_check_offset() - (primary_supers_offset_in_bytes() + sizeof(oopDesc))) / sizeof(klassOop);
+      juint d = (super_check_offset() - in_bytes(primary_supers_offset())) / sizeof(klassOop);
       assert(d < primary_super_limit(), "oob");
       assert(_primary_supers[d] == as_klassOop(), "proper init");
       return d;
@@ -380,16 +383,16 @@
   virtual void set_alloc_size(juint n) = 0;
 
   // Compiler support
-  static int super_offset_in_bytes()         { return offset_of(Klass, _super); }
-  static int super_check_offset_offset_in_bytes() { return offset_of(Klass, _super_check_offset); }
-  static int primary_supers_offset_in_bytes(){ return offset_of(Klass, _primary_supers); }
-  static int secondary_super_cache_offset_in_bytes() { return offset_of(Klass, _secondary_super_cache); }
-  static int secondary_supers_offset_in_bytes() { return offset_of(Klass, _secondary_supers); }
-  static int java_mirror_offset_in_bytes()   { return offset_of(Klass, _java_mirror); }
-  static int graal_mirror_offset_in_bytes()  { return offset_of(Klass, _graal_mirror); }
-  static int modifier_flags_offset_in_bytes(){ return offset_of(Klass, _modifier_flags); }
-  static int layout_helper_offset_in_bytes() { return offset_of(Klass, _layout_helper); }
-  static int access_flags_offset_in_bytes()  { return offset_of(Klass, _access_flags); }
+  static ByteSize super_offset()                 { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _super)); }
+  static ByteSize super_check_offset_offset()    { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _super_check_offset)); }
+  static ByteSize primary_supers_offset()        { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _primary_supers)); }
+  static ByteSize secondary_super_cache_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _secondary_super_cache)); }
+  static ByteSize secondary_supers_offset()      { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _secondary_supers)); }
+  static ByteSize java_mirror_offset()           { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _java_mirror)); }
+  static ByteSize modifier_flags_offset()        { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _modifier_flags)); }
+  static ByteSize layout_helper_offset()         { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _layout_helper)); }
+  static ByteSize access_flags_offset()          { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _access_flags)); }
+  static ByteSize graal_mirror_offset()          { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _graal_mirror)); }
 
   // Unpacking layout_helper:
   enum {
@@ -486,7 +489,7 @@
   bool is_subtype_of(klassOop k) const {
     juint    off = k->klass_part()->super_check_offset();
     klassOop sup = *(klassOop*)( (address)as_klassOop() + off );
-    const juint secondary_offset = secondary_super_cache_offset_in_bytes() + sizeof(oopDesc);
+    const juint secondary_offset = in_bytes(secondary_super_cache_offset());
     if (sup == k) {
       return true;
     } else if (off != secondary_offset) {
@@ -682,7 +685,7 @@
   // are potential problems in setting the bias pattern for
   // JVM-internal oops.
   inline void set_prototype_header(markOop header);
-  static int prototype_header_offset_in_bytes() { return offset_of(Klass, _prototype_header); }
+  static ByteSize prototype_header_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _prototype_header)); }
 
   int  biased_lock_revocation_count() const { return (int) _biased_lock_revocation_count; }
   // Atomically increments biased_lock_revocation_count and returns updated value
@@ -691,6 +694,7 @@
   jlong last_biased_lock_bulk_revocation_time() { return _last_biased_lock_bulk_revocation_time; }
   void  set_last_biased_lock_bulk_revocation_time(jlong cur_time) { _last_biased_lock_bulk_revocation_time = cur_time; }
 
+  TRACE_DEFINE_KLASS_METHODS;
 
   // garbage collection support
   virtual void follow_weak_klass_links(
--- a/src/share/vm/oops/klassKlass.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/oops/klassKlass.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -73,7 +73,9 @@
   MarkSweep::mark_and_push(k->adr_secondary_super_cache());
   MarkSweep::mark_and_push(k->adr_secondary_supers());
   MarkSweep::mark_and_push(k->adr_java_mirror());
+#ifdef GRAAL
   MarkSweep::mark_and_push(k->adr_graal_mirror());
+#endif
   // We follow the subklass and sibling links at the end of the
   // marking phase, since otherwise following them will prevent
   // class unloading (all classes are transitively linked from
@@ -93,7 +95,9 @@
   PSParallelCompact::mark_and_push(cm, k->adr_secondary_super_cache());
   PSParallelCompact::mark_and_push(cm, k->adr_secondary_supers());
   PSParallelCompact::mark_and_push(cm, k->adr_java_mirror());
+#ifdef GRAAL
   PSParallelCompact::mark_and_push(cm, k->adr_graal_mirror());
+#endif
   // We follow the subklass and sibling links at the end of the
   // marking phase, since otherwise following them will prevent
   // class unloading (all classes are transitively linked from
@@ -113,7 +117,9 @@
   blk->do_oop(k->adr_secondary_super_cache());
   blk->do_oop(k->adr_secondary_supers());
   blk->do_oop(k->adr_java_mirror());
+#ifdef GRAAL
   blk->do_oop(k->adr_graal_mirror());
+#endif
   // The following are in the perm gen and are treated
   // specially in a later phase of a perm gen collection; ...
   assert(oop(k)->is_perm(), "should be in perm");
@@ -147,8 +153,10 @@
   if (mr.contains(adr)) blk->do_oop(adr);
   adr = k->adr_java_mirror();
   if (mr.contains(adr)) blk->do_oop(adr);
+#ifdef GRAAL
   adr = k->adr_graal_mirror();
   if (mr.contains(adr)) blk->do_oop(adr);
+#endif
   // The following are "weak links" in the perm gen and are
   // treated specially in a later phase of a perm gen collection.
   assert(oop(k)->is_perm(), "should be in perm");
@@ -177,7 +185,9 @@
   MarkSweep::adjust_pointer(k->adr_secondary_super_cache());
   MarkSweep::adjust_pointer(k->adr_secondary_supers());
   MarkSweep::adjust_pointer(k->adr_java_mirror());
+#ifdef GRAAL
   MarkSweep::adjust_pointer(k->adr_graal_mirror());
+#endif
   MarkSweep::adjust_pointer(k->adr_subklass());
   MarkSweep::adjust_pointer(k->adr_next_sibling());
   return size;
--- a/src/share/vm/oops/klassOop.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/oops/klassOop.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -38,14 +38,8 @@
 
 class klassOopDesc : public oopDesc {
  public:
-  // size operation
-  static int header_size()                       { return sizeof(klassOopDesc)/HeapWordSize; }
-
-  // support for code generation
-  static int klass_part_offset_in_bytes()        { return sizeof(klassOopDesc); }
-
   // returns the Klass part containing dispatching behavior
-  Klass* klass_part() const                      { return (Klass*)((address)this + klass_part_offset_in_bytes()); }
+  Klass* klass_part() const                      { return (Klass*)((address)this + sizeof(klassOopDesc)); }
 
   // Convenience wrapper
   inline oop java_mirror() const;
--- a/src/share/vm/oops/methodDataKlass.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/oops/methodDataKlass.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -35,7 +35,6 @@
 #include "oops/oop.inline2.hpp"
 #include "runtime/handles.inline.hpp"
 #ifndef SERIALGC
-#include "gc_implementation/parallelScavenge/psPromotionManager.inline.hpp"
 #include "gc_implementation/parallelScavenge/psScavenge.inline.hpp"
 #include "oops/oop.pcgc.inline.hpp"
 #endif
@@ -85,7 +84,9 @@
 
   obj->follow_header();
   MarkSweep::mark_and_push(m->adr_method());
+#ifdef GRAAL
   MarkSweep::mark_and_push(m->adr_graal_mirror());
+#endif
   ResourceMark rm;
   for (ProfileData* data = m->first_data();
        m->is_valid(data);
@@ -102,7 +103,9 @@
 
   obj->follow_header(cm);
   PSParallelCompact::mark_and_push(cm, m->adr_method());
+#ifdef GRAAL
   PSParallelCompact::mark_and_push(cm, m->adr_graal_mirror());
+#endif
   ResourceMark rm;
   for (ProfileData* data = m->first_data();
        m->is_valid(data);
@@ -122,7 +125,9 @@
 
   obj->oop_iterate_header(blk);
   blk->do_oop(m->adr_method());
+#ifdef GRAAL
   blk->do_oop(m->adr_graal_mirror());
+#endif
   ResourceMark rm;
   for (ProfileData* data = m->first_data();
        m->is_valid(data);
@@ -144,11 +149,12 @@
   if (mr.contains(adr)) {
     blk->do_oop(m->adr_method());
   }
+#ifdef GRAAL
   adr = m->adr_graal_mirror();
   if(mr.contains(adr)) {
     blk->do_oop(m->adr_graal_mirror());
   }
-
+#endif
   ResourceMark rm;
   for (ProfileData* data = m->first_data();
        m->is_valid(data);
@@ -167,7 +173,9 @@
 
   obj->adjust_header();
   MarkSweep::adjust_pointer(m->adr_method());
+#ifdef GRAAL
   MarkSweep::adjust_pointer(m->adr_graal_mirror());
+#endif
   ResourceMark rm;
   ProfileData* data;
   for (data = m->first_data(); m->is_valid(data); data = m->next_data(data)) {
@@ -183,11 +191,12 @@
   methodDataOop m = methodDataOop(obj);
   // This should never point into the young gen.
   assert(!PSScavenge::should_scavenge(m->adr_method()), "Sanity");
- 
+#ifdef GRAAL
   oop* adr = m->adr_graal_mirror();
   if(PSScavenge::should_scavenge(adr)) {
     pm->claim_or_forward_depth(adr);
   }
+#endif
 }
 
 int methodDataKlass::oop_update_pointers(ParCompactionManager* cm, oop obj) {
@@ -195,7 +204,9 @@
   methodDataOop m = methodDataOop(obj);
 
   PSParallelCompact::adjust_pointer(m->adr_method());
+#ifdef GRAAL
   PSParallelCompact::adjust_pointer(m->adr_graal_mirror());
+#endif
 
   ResourceMark rm;
   ProfileData* data;
--- a/src/share/vm/oops/methodDataOop.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/oops/methodDataOop.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -761,7 +761,9 @@
   ResourceMark rm;
   // Set the method back-pointer.
   _method = method();
+#ifdef GRAAL
   _graal_mirror = NULL;
+#endif
 
   if (TieredCompilation) {
     _invocation_counter.init();
--- a/src/share/vm/oops/methodKlass.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/oops/methodKlass.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -83,6 +83,7 @@
   m->set_max_stack(0);
   m->set_max_locals(0);
   m->set_intrinsic_id(vmIntrinsics::_none);
+  m->set_jfr_towrite(false);
   m->set_method_data(NULL);
   m->set_interpreter_throwout_count(0);
   m->set_vtable_index(methodOopDesc::garbage_vtable_index);
@@ -102,7 +103,9 @@
   m->invocation_counter()->init();
   m->backedge_counter()->init();
   m->clear_number_of_breakpoints();
+#ifdef GRAAL
   m->set_graal_mirror(NULL);
+#endif
 
 #ifdef TIERED
   m->set_rate(0);
@@ -128,7 +131,9 @@
   // know that Universe::methodKlassObj never moves.
   MarkSweep::mark_and_push(m->adr_constMethod());
   MarkSweep::mark_and_push(m->adr_constants());
+#ifdef GRAAL
   MarkSweep::mark_and_push(m->adr_graal_mirror());
+#endif
   if (m->method_data() != NULL) {
     MarkSweep::mark_and_push(m->adr_method_data());
   }
@@ -143,10 +148,14 @@
   // know that Universe::methodKlassObj never moves.
   PSParallelCompact::mark_and_push(cm, m->adr_constMethod());
   PSParallelCompact::mark_and_push(cm, m->adr_constants());
+#ifdef GRAAL
   PSParallelCompact::mark_and_push(cm, m->adr_graal_mirror());
+#endif
+#ifdef COMPILER2
   if (m->method_data() != NULL) {
     PSParallelCompact::mark_and_push(cm, m->adr_method_data());
   }
+#endif
 }
 #endif // SERIALGC
 
@@ -160,7 +169,9 @@
   // know that Universe::methodKlassObj never moves
   blk->do_oop(m->adr_constMethod());
   blk->do_oop(m->adr_constants());
+#ifdef GRAAL
   blk->do_oop(m->adr_graal_mirror());
+#endif
   if (m->method_data() != NULL) {
     blk->do_oop(m->adr_method_data());
   }
@@ -181,8 +192,10 @@
   if (mr.contains(adr)) blk->do_oop(adr);
   adr = m->adr_constants();
   if (mr.contains(adr)) blk->do_oop(adr);
+#ifdef GRAAL
   adr = m->adr_graal_mirror();
   if (mr.contains(adr)) blk->do_oop(adr);
+#endif
   if (m->method_data() != NULL) {
     adr = m->adr_method_data();
     if (mr.contains(adr)) blk->do_oop(adr);
@@ -201,7 +214,9 @@
   // know that Universe::methodKlassObj never moves.
   MarkSweep::adjust_pointer(m->adr_constMethod());
   MarkSweep::adjust_pointer(m->adr_constants());
+#ifdef GRAAL
   MarkSweep::adjust_pointer(m->adr_graal_mirror());
+#endif
   if (m->method_data() != NULL) {
     MarkSweep::adjust_pointer(m->adr_method_data());
   }
@@ -218,10 +233,14 @@
   methodOop m = methodOop(obj);
   PSParallelCompact::adjust_pointer(m->adr_constMethod());
   PSParallelCompact::adjust_pointer(m->adr_constants());
+#ifdef GRAAL
   PSParallelCompact::adjust_pointer(m->adr_graal_mirror());
+#endif
+#ifdef COMPILER2
   if (m->method_data() != NULL) {
     PSParallelCompact::adjust_pointer(m->adr_method_data());
   }
+#endif // COMPILER2
   return m->object_size();
 }
 #endif // SERIALGC
--- a/src/share/vm/oops/methodOop.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/oops/methodOop.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -207,12 +207,6 @@
 }
 
 address methodOopDesc::bcp_from(int bci) const {
-#ifdef ASSERT
-  if (!((is_native() && bci == 0)  || (!is_native() && 0 <= bci && bci < code_size()))) {
-    char buf[1024];
-    tty->print_cr("bci: %i, size: %i, method: %s", bci, code_size(), const_cast<methodOop>(this)->name_and_sig_as_C_string(buf, 1024));
-  }
-#endif // ASSERT
   assert((is_native() && bci == 0)  || (!is_native() && 0 <= bci && bci < code_size()), "illegal bci");
   address bcp = code_base() + bci;
   assert(is_native() && bcp == code_base() || contains(bcp), "bcp doesn't belong to this method");
@@ -602,6 +596,11 @@
   clear_code();
 }
 
+address methodOopDesc::critical_native_function() {
+  methodHandle mh(this);
+  return NativeLookup::lookup_critical_entry(mh);
+}
+
 
 void methodOopDesc::set_signature_handler(address handler) {
   address* signature_handler =  signature_handler_addr();
--- a/src/share/vm/oops/methodOop.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/oops/methodOop.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -77,7 +77,7 @@
 // | method_size             | max_stack                  |
 // | max_locals              | size_of_parameters         |
 // |------------------------------------------------------|
-// | intrinsic_id, (unused)  |  throwout_count            |
+// |intrinsic_id|   flags    |  throwout_count            |
 // |------------------------------------------------------|
 // | num_breakpoints         |  (unused)                  |
 // |------------------------------------------------------|
@@ -124,6 +124,8 @@
   u2                _max_locals;                 // Number of local variables used by this method
   u2                _size_of_parameters;         // size of the parameter block (receiver + arguments) in words
   u1                _intrinsic_id;               // vmSymbols::intrinsic_id (0 == _none)
+  u1                _jfr_towrite : 1,            // Flags
+                                 : 7;
   u2                _interpreter_throwout_count; // Count of times method was exited via exception while interpreting
   u2                _number_of_breakpoints;      // fullspeed debugging support
   InvocationCounter _invocation_counter;         // Incremented before each activation of the method - used to trigger frequency-based optimizations
@@ -227,6 +229,7 @@
   void clear_number_of_breakpoints()             { _number_of_breakpoints = 0; }
 
   // index into instanceKlass methods() array
+  // note: also used by jfr
   u2 method_idnum() const           { return constMethod()->method_idnum(); }
   void set_method_idnum(u2 idnum)   { constMethod()->set_method_idnum(idnum); }
 
@@ -406,6 +409,8 @@
     native_bind_event_is_interesting = true
   };
   address native_function() const                { return *(native_function_addr()); }
+  address critical_native_function();
+
   // Must specify a real function (not NULL).
   // Use clear_native_function() to unregister.
   void set_native_function(address function, bool post_event_flag);
@@ -656,6 +661,9 @@
   void init_intrinsic_id();     // updates from _none if a match
   static vmSymbols::SID klass_id_for_intrinsics(klassOop holder);
 
+  bool jfr_towrite()                 { return _jfr_towrite; }
+  void set_jfr_towrite(bool towrite) { _jfr_towrite = towrite; }
+
   // On-stack replacement support
   bool has_osr_nmethod(int level, bool match_level) {
    return instanceKlass::cast(method_holder())->lookup_osr_nmethod(this, InvocationEntryBci, level, match_level) != NULL;
--- a/src/share/vm/oops/objArrayKlass.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/oops/objArrayKlass.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -47,7 +47,7 @@
   oop* bottom_klass_addr()            { return (oop*)&_bottom_klass; }
 
   // Compiler/Interpreter offset
-  static int element_klass_offset_in_bytes() { return offset_of(objArrayKlass, _element_klass); }
+  static ByteSize element_klass_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(objArrayKlass, _element_klass)); }
 
   // Dispatched operation
   bool can_be_primary_super_slow() const;
--- a/src/share/vm/opto/block.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/opto/block.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -284,13 +284,13 @@
   // helper function that adds caller save registers to MachProjNode
   void add_call_kills(MachProjNode *proj, RegMask& regs, const char* save_policy, bool exclude_soe);
   // Schedule a call next in the block
-  uint sched_call(Matcher &matcher, Block_Array &bbs, uint node_cnt, Node_List &worklist, int *ready_cnt, MachCallNode *mcall, VectorSet &next_call);
+  uint sched_call(Matcher &matcher, Block_Array &bbs, uint node_cnt, Node_List &worklist, GrowableArray<int> &ready_cnt, MachCallNode *mcall, VectorSet &next_call);
 
   // Perform basic-block local scheduling
-  Node *select(PhaseCFG *cfg, Node_List &worklist, int *ready_cnt, VectorSet &next_call, uint sched_slot);
+  Node *select(PhaseCFG *cfg, Node_List &worklist, GrowableArray<int> &ready_cnt, VectorSet &next_call, uint sched_slot);
   void set_next_call( Node *n, VectorSet &next_call, Block_Array &bbs );
   void needed_for_next_call(Node *this_call, VectorSet &next_call, Block_Array &bbs);
-  bool schedule_local(PhaseCFG *cfg, Matcher &m, int *ready_cnt, VectorSet &next_call);
+  bool schedule_local(PhaseCFG *cfg, Matcher &m, GrowableArray<int> &ready_cnt, VectorSet &next_call);
   // Cleanup if any code lands between a Call and his Catch
   void call_catch_cleanup(Block_Array &bbs);
   // Detect implicit-null-check opportunities.  Basically, find NULL checks
--- a/src/share/vm/opto/c2_globals.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/opto/c2_globals.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -426,6 +426,9 @@
   product(bool, EliminateLocks, true,                                       \
           "Coarsen locks when possible")                                    \
                                                                             \
+  product(bool, EliminateNestedLocks, true,                                 \
+          "Eliminate nested locks of the same object when possible")        \
+                                                                            \
   notproduct(bool, PrintLockStatistics, false,                              \
           "Print precise statistics on the dynamic lock usage")             \
                                                                             \
--- a/src/share/vm/opto/callnode.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/opto/callnode.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -400,10 +400,10 @@
       Node *box = mcall->monitor_box(this, i);
       Node *obj = mcall->monitor_obj(this, i);
       if ( OptoReg::is_valid(regalloc->get_reg_first(box)) ) {
-        while( !box->is_BoxLock() )  box = box->in(1);
+        box = BoxLockNode::box_node(box);
         format_helper( regalloc, st, box, "MON-BOX[", i, &scobjs );
       } else {
-        OptoReg::Name box_reg = BoxLockNode::stack_slot(box);
+        OptoReg::Name box_reg = BoxLockNode::reg(box);
         st->print(" MON-BOX%d=%s+%d",
                    i,
                    OptoReg::regname(OptoReg::c_frame_pointer),
@@ -411,8 +411,7 @@
       }
       const char* obj_msg = "MON-OBJ[";
       if (EliminateLocks) {
-        while( !box->is_BoxLock() )  box = box->in(1);
-        if (box->as_BoxLock()->is_eliminated())
+        if (BoxLockNode::box_node(box)->is_eliminated())
           obj_msg = "MON-OBJ(LOCK ELIMINATED)[";
       }
       format_helper( regalloc, st, obj, obj_msg, i, &scobjs );
@@ -1387,8 +1386,9 @@
     Node *n = ctrl_proj->in(0);
     if (n != NULL && n->is_Unlock()) {
       UnlockNode *unlock = n->as_Unlock();
-      if ((lock->obj_node() == unlock->obj_node()) &&
-          (lock->box_node() == unlock->box_node()) && !unlock->is_eliminated()) {
+      if (lock->obj_node()->eqv_uncast(unlock->obj_node()) &&
+          BoxLockNode::same_slot(lock->box_node(), unlock->box_node()) &&
+          !unlock->is_eliminated()) {
         lock_ops.append(unlock);
         return true;
       }
@@ -1431,8 +1431,8 @@
   }
   if (ctrl->is_Lock()) {
     LockNode *lock = ctrl->as_Lock();
-    if ((lock->obj_node() == unlock->obj_node()) &&
-            (lock->box_node() == unlock->box_node())) {
+    if (lock->obj_node()->eqv_uncast(unlock->obj_node()) &&
+        BoxLockNode::same_slot(lock->box_node(), unlock->box_node())) {
       lock_result = lock;
     }
   }
@@ -1462,8 +1462,9 @@
       }
       if (lock1_node != NULL && lock1_node->is_Lock()) {
         LockNode *lock1 = lock1_node->as_Lock();
-        if ((lock->obj_node() == lock1->obj_node()) &&
-            (lock->box_node() == lock1->box_node()) && !lock1->is_eliminated()) {
+        if (lock->obj_node()->eqv_uncast(lock1->obj_node()) &&
+            BoxLockNode::same_slot(lock->box_node(), lock1->box_node()) &&
+            !lock1->is_eliminated()) {
           lock_ops.append(lock1);
           return true;
         }
@@ -1507,19 +1508,16 @@
 void AbstractLockNode::create_lock_counter(JVMState* state) {
   _counter = OptoRuntime::new_named_counter(state, NamedCounter::LockCounter);
 }
-#endif
 
-void AbstractLockNode::set_eliminated() {
-  _eliminate = true;
-#ifndef PRODUCT
+void AbstractLockNode::set_eliminated_lock_counter() {
   if (_counter) {
     // Update the counter to indicate that this lock was eliminated.
     // The counter update code will stay around even though the
     // optimizer will eliminate the lock operation itself.
     _counter->set_tag(NamedCounter::EliminatedLockCounter);
   }
+}
 #endif
-}
 
 //=============================================================================
 Node *LockNode::Ideal(PhaseGVN *phase, bool can_reshape) {
@@ -1535,7 +1533,7 @@
   // prevents macro expansion from expanding the lock.  Since we don't
   // modify the graph, the value returned from this function is the
   // one computed above.
-  if (can_reshape && EliminateLocks && (!is_eliminated() || is_coarsened())) {
+  if (can_reshape && EliminateLocks && !is_non_esc_obj()) {
     //
     // If we are locking an unescaped object, the lock/unlock is unnecessary
     //
@@ -1544,16 +1542,11 @@
     if (cgr != NULL)
       es = cgr->escape_state(obj_node());
     if (es != PointsToNode::UnknownEscape && es != PointsToNode::GlobalEscape) {
-      if (!is_eliminated()) {
-        // Mark it eliminated to update any counters
-        this->set_eliminated();
-      } else {
-        assert(is_coarsened(), "sanity");
-        // The lock could be marked eliminated by lock coarsening
-        // code during first IGVN before EA. Clear coarsened flag
-        // to eliminate all associated locks/unlocks.
-        this->clear_coarsened();
-      }
+      assert(!is_eliminated() || is_coarsened(), "sanity");
+      // The lock could be marked eliminated by lock coarsening
+      // code during first IGVN before EA. Replace coarsened flag
+      // to eliminate all associated locks/unlocks.
+      this->set_non_esc_obj();
       return result;
     }
 
@@ -1613,8 +1606,7 @@
         for (int i = 0; i < lock_ops.length(); i++) {
           AbstractLockNode* lock = lock_ops.at(i);
 
-          // Mark it eliminated to update any counters
-          lock->set_eliminated();
+          // Mark it eliminated by coarsening and update any counters
           lock->set_coarsened();
         }
       } else if (ctrl->is_Region() &&
@@ -1632,6 +1624,40 @@
 }
 
 //=============================================================================
+bool LockNode::is_nested_lock_region() {
+  BoxLockNode* box = box_node()->as_BoxLock();
+  int stk_slot = box->stack_slot();
+  if (stk_slot <= 0)
+    return false; // External lock or it is not Box (Phi node).
+
+  // Ignore complex cases: merged locks or multiple locks.
+  Node* obj = obj_node();
+  LockNode* unique_lock = NULL;
+  if (!box->is_simple_lock_region(&unique_lock, obj) ||
+      (unique_lock != this)) {
+    return false;
+  }
+
+  // Look for external lock for the same object.
+  SafePointNode* sfn = this->as_SafePoint();
+  JVMState* youngest_jvms = sfn->jvms();
+  int max_depth = youngest_jvms->depth();
+  for (int depth = 1; depth <= max_depth; depth++) {
+    JVMState* jvms = youngest_jvms->of_depth(depth);
+    int num_mon  = jvms->nof_monitors();
+    // Loop over monitors
+    for (int idx = 0; idx < num_mon; idx++) {
+      Node* obj_node = sfn->monitor_obj(jvms, idx);
+      BoxLockNode* box_node = sfn->monitor_box(jvms, idx)->as_BoxLock();
+      if ((box_node->stack_slot() < stk_slot) && obj_node->eqv_uncast(obj)) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+//=============================================================================
 uint UnlockNode::size_of() const { return sizeof(*this); }
 
 //=============================================================================
@@ -1649,7 +1675,7 @@
   // modify the graph, the value returned from this function is the
   // one computed above.
   // Escape state is defined after Parse phase.
-  if (can_reshape && EliminateLocks && (!is_eliminated() || is_coarsened())) {
+  if (can_reshape && EliminateLocks && !is_non_esc_obj()) {
     //
     // If we are unlocking an unescaped object, the lock/unlock is unnecessary.
     //
@@ -1658,16 +1684,11 @@
     if (cgr != NULL)
       es = cgr->escape_state(obj_node());
     if (es != PointsToNode::UnknownEscape && es != PointsToNode::GlobalEscape) {
-      if (!is_eliminated()) {
-        // Mark it eliminated to update any counters
-        this->set_eliminated();
-      } else {
-        assert(is_coarsened(), "sanity");
-        // The lock could be marked eliminated by lock coarsening
-        // code during first IGVN before EA. Clear coarsened flag
-        // to eliminate all associated locks/unlocks.
-        this->clear_coarsened();
-      }
+      assert(!is_eliminated() || is_coarsened(), "sanity");
+      // The lock could be marked eliminated by lock coarsening
+      // code during first IGVN before EA. Replace coarsened flag
+      // to eliminate all associated locks/unlocks.
+      this->set_non_esc_obj();
     }
   }
   return result;
--- a/src/share/vm/opto/callnode.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/opto/callnode.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -791,6 +791,10 @@
   // are defined in graphKit.cpp, which sets up the bidirectional relation.)
   InitializeNode* initialization();
 
+  // Return the corresponding storestore barrier (or null if none).
+  // Walks out edges to find it...
+  MemBarStoreStoreNode* storestore();
+
   // Convenience for initialization->maybe_set_complete(phase)
   bool maybe_set_complete(PhaseGVN* phase);
 };
@@ -836,8 +840,12 @@
 //------------------------------AbstractLockNode-----------------------------------
 class AbstractLockNode: public CallNode {
 private:
-  bool _eliminate;    // indicates this lock can be safely eliminated
-  bool _coarsened;    // indicates this lock was coarsened
+  enum {
+    Regular = 0,  // Normal lock
+    NonEscObj,    // Lock is used for non escaping object
+    Coarsened,    // Lock was coarsened
+    Nested        // Nested lock
+  } _kind;
 #ifndef PRODUCT
   NamedCounter* _counter;
 #endif
@@ -854,12 +862,13 @@
                                GrowableArray<AbstractLockNode*> &lock_ops);
   LockNode *find_matching_lock(UnlockNode* unlock);
 
+  // Update the counter to indicate that this lock was eliminated.
+  void set_eliminated_lock_counter() PRODUCT_RETURN;
 
 public:
   AbstractLockNode(const TypeFunc *tf)
     : CallNode(tf, NULL, TypeRawPtr::BOTTOM),
-      _coarsened(false),
-      _eliminate(false)
+      _kind(Regular)
   {
 #ifndef PRODUCT
     _counter = NULL;
@@ -869,20 +878,23 @@
   Node *   obj_node() const       {return in(TypeFunc::Parms + 0); }
   Node *   box_node() const       {return in(TypeFunc::Parms + 1); }
   Node *   fastlock_node() const  {return in(TypeFunc::Parms + 2); }
+  void     set_box_node(Node* box) { set_req(TypeFunc::Parms + 1, box); }
+
   const Type *sub(const Type *t1, const Type *t2) const { return TypeInt::CC;}
 
   virtual uint size_of() const { return sizeof(*this); }
 
-  bool is_eliminated()         {return _eliminate; }
-  // mark node as eliminated and update the counter if there is one
-  void set_eliminated();
+  bool is_eliminated()  const { return (_kind != Regular); }
+  bool is_non_esc_obj() const { return (_kind == NonEscObj); }
+  bool is_coarsened()   const { return (_kind == Coarsened); }
+  bool is_nested()      const { return (_kind == Nested); }
 
-  bool is_coarsened()  { return _coarsened; }
-  void set_coarsened() { _coarsened = true; }
-  void clear_coarsened() { _coarsened = false; }
+  void set_non_esc_obj() { _kind = NonEscObj; set_eliminated_lock_counter(); }
+  void set_coarsened()   { _kind = Coarsened; set_eliminated_lock_counter(); }
+  void set_nested()      { _kind = Nested; set_eliminated_lock_counter(); }
 
   // locking does not modify its arguments
-  virtual bool        may_modify(const TypePtr *addr_t, PhaseTransform *phase){ return false;}
+  virtual bool may_modify(const TypePtr *addr_t, PhaseTransform *phase){ return false;}
 
 #ifndef PRODUCT
   void create_lock_counter(JVMState* s);
@@ -932,6 +944,8 @@
   virtual void  clone_jvms() {
     set_jvms(jvms()->clone_deep(Compile::current()));
   }
+
+  bool is_nested_lock_region(); // Is this Lock nested?
 };
 
 //------------------------------Unlock---------------------------------------
--- a/src/share/vm/opto/cfgnode.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/opto/cfgnode.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1597,7 +1597,7 @@
       bool is_loop = (r->is_Loop() && r->req() == 3);
       // Then, check if there is a data loop when phi references itself directly
       // or through other data nodes.
-      if (is_loop && !phase->eqv_uncast(uin, in(LoopNode::EntryControl)) ||
+      if (is_loop && !uin->eqv_uncast(in(LoopNode::EntryControl)) ||
          !is_loop && is_unsafe_data_reference(uin)) {
         // Break this data loop to avoid creation of a dead loop.
         if (can_reshape) {
--- a/src/share/vm/opto/chaitin.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/opto/chaitin.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1946,18 +1946,29 @@
     reg2offset_unchecked(OptoReg::add(_matcher._old_SP,-1)) - reg2offset_unchecked(_matcher._new_SP)+jintSize);
 
   // Preserve area dump
+  int fixed_slots = C->fixed_slots();
+  OptoReg::Name begin_in_preserve = OptoReg::add(_matcher._old_SP, -(int)C->in_preserve_stack_slots());
+  OptoReg::Name return_addr = _matcher.return_addr();
+
   reg = OptoReg::add(reg, -1);
-  while( OptoReg::is_stack(reg)) {
+  while (OptoReg::is_stack(reg)) {
     tty->print("#r%3.3d %s+%2d: ",reg,fp,reg2offset_unchecked(reg));
-    if( _matcher.return_addr() == reg )
+    if (return_addr == reg) {
       tty->print_cr("return address");
-    else if( _matcher.return_addr() == OptoReg::add(reg,1) &&
-             VerifyStackAtCalls )
-      tty->print_cr("0xBADB100D   +VerifyStackAtCalls");
-    else if ((int)OptoReg::reg2stack(reg) < C->fixed_slots())
+    } else if (reg >= begin_in_preserve) {
+      // Preserved slots are present on x86
+      if (return_addr == OptoReg::add(reg, VMRegImpl::slots_per_word))
+        tty->print_cr("saved fp register");
+      else if (return_addr == OptoReg::add(reg, 2*VMRegImpl::slots_per_word) &&
+               VerifyStackAtCalls)
+        tty->print_cr("0xBADB100D   +VerifyStackAtCalls");
+      else
+        tty->print_cr("in_preserve");
+    } else if ((int)OptoReg::reg2stack(reg) < fixed_slots) {
       tty->print_cr("Fixed slot %d", OptoReg::reg2stack(reg));
-    else
-      tty->print_cr("pad2, in_preserve");
+    } else {
+      tty->print_cr("pad2, stack alignment");
+    }
     reg = OptoReg::add(reg, -1);
   }
 
--- a/src/share/vm/opto/chaitin.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/opto/chaitin.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -485,7 +485,11 @@
     return yank_if_dead(old, current_block, &value, &regnd);
   }
 
-  int yank_if_dead( Node *old, Block *current_block, Node_List *value, Node_List *regnd );
+  int yank_if_dead( Node *old, Block *current_block, Node_List *value, Node_List *regnd ) {
+    return yank_if_dead_recurse(old, old, current_block, value, regnd);
+  }
+  int yank_if_dead_recurse(Node *old, Node *orig_old, Block *current_block,
+                           Node_List *value, Node_List *regnd);
   int yank( Node *old, Block *current_block, Node_List *value, Node_List *regnd );
   int elide_copy( Node *n, int k, Block *current_block, Node_List &value, Node_List &regnd, bool can_change_regs );
   int use_prior_register( Node *copy, uint idx, Node *def, Block *current_block, Node_List &value, Node_List &regnd );
--- a/src/share/vm/opto/classes.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/opto/classes.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -166,6 +166,7 @@
 macro(MemBarRelease)
 macro(MemBarReleaseLock)
 macro(MemBarVolatile)
+macro(MemBarStoreStore)
 macro(MergeMem)
 macro(MinI)
 macro(ModD)
--- a/src/share/vm/opto/compile.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/opto/compile.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1282,12 +1282,11 @@
   if( tk ) {
     // If we are referencing a field within a Klass, we need
     // to assume the worst case of an Object.  Both exact and
-    // inexact types must flatten to the same alias class.
-    // Since the flattened result for a klass is defined to be
-    // precisely java.lang.Object, use a constant ptr.
+    // inexact types must flatten to the same alias class so
+    // use NotNull as the PTR.
     if ( offset == Type::OffsetBot || (offset >= 0 && (size_t)offset < sizeof(Klass)) ) {
 
-      tj = tk = TypeKlassPtr::make(TypePtr::Constant,
+      tj = tk = TypeKlassPtr::make(TypePtr::NotNull,
                                    TypeKlassPtr::OBJECT->klass(),
                                    offset);
     }
@@ -1307,10 +1306,12 @@
     // these 2 disparate memories into the same alias class.  Since the
     // primary supertype array is read-only, there's no chance of confusion
     // where we bypass an array load and an array store.
-    uint off2 = offset - Klass::primary_supers_offset_in_bytes();
-    if( offset == Type::OffsetBot ||
-        off2 < Klass::primary_super_limit()*wordSize ) {
-      offset = sizeof(oopDesc) +Klass::secondary_super_cache_offset_in_bytes();
+    int primary_supers_offset = in_bytes(Klass::primary_supers_offset());
+    if (offset == Type::OffsetBot ||
+        (offset >= primary_supers_offset &&
+         offset < (int)(primary_supers_offset + Klass::primary_super_limit() * wordSize)) ||
+        offset == (int)in_bytes(Klass::secondary_super_cache_offset())) {
+      offset = in_bytes(Klass::secondary_super_cache_offset());
       tj = tk = TypeKlassPtr::make( TypePtr::NotNull, tk->klass(), offset );
     }
   }
@@ -1489,13 +1490,13 @@
         alias_type(idx)->set_rewritable(false);
     }
     if (flat->isa_klassptr()) {
-      if (flat->offset() == Klass::super_check_offset_offset_in_bytes() + (int)sizeof(oopDesc))
+      if (flat->offset() == in_bytes(Klass::super_check_offset_offset()))
         alias_type(idx)->set_rewritable(false);
-      if (flat->offset() == Klass::modifier_flags_offset_in_bytes() + (int)sizeof(oopDesc))
+      if (flat->offset() == in_bytes(Klass::modifier_flags_offset()))
         alias_type(idx)->set_rewritable(false);
-      if (flat->offset() == Klass::access_flags_offset_in_bytes() + (int)sizeof(oopDesc))
+      if (flat->offset() == in_bytes(Klass::access_flags_offset()))
         alias_type(idx)->set_rewritable(false);
-      if (flat->offset() == Klass::java_mirror_offset_in_bytes() + (int)sizeof(oopDesc))
+      if (flat->offset() == in_bytes(Klass::java_mirror_offset()))
         alias_type(idx)->set_rewritable(false);
     }
     // %%% (We would like to finalize JavaThread::threadObj_offset(),
@@ -2521,7 +2522,7 @@
             break;
           }
         }
-        assert(p != NULL, "must be found");
+        assert(proj != NULL, "must be found");
         p->subsume_by(proj);
       }
     }
--- a/src/share/vm/opto/escape.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/opto/escape.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1595,6 +1595,7 @@
   GrowableArray<Node*> alloc_worklist;
   GrowableArray<Node*> addp_worklist;
   GrowableArray<Node*> ptr_cmp_worklist;
+  GrowableArray<Node*> storestore_worklist;
   PhaseGVN* igvn = _igvn;
 
   // Push all useful nodes onto CG list and set their type.
@@ -1618,6 +1619,11 @@
                (n->Opcode() == Op_CmpP || n->Opcode() == Op_CmpN)) {
       // Compare pointers nodes
       ptr_cmp_worklist.append(n);
+    } else if (n->is_MemBarStoreStore()) {
+      // Collect all MemBarStoreStore nodes so that depending on the
+      // escape status of the associated Allocate node some of them
+      // may be eliminated.
+      storestore_worklist.append(n);
     }
     for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
       Node* m = n->fast_out(i);   // Get user
@@ -1681,12 +1687,23 @@
   // Observed 8 passes in jvm2008 compiler.compiler.
   // Set limit to 20 to catch situation when something
   // did go wrong and recompile the method without EA.
+  // Also limit build time to 30 sec (60 in debug VM).
 
 #define CG_BUILD_ITER_LIMIT 20
 
+#ifdef ASSERT
+#define CG_BUILD_TIME_LIMIT 60.0
+#else
+#define CG_BUILD_TIME_LIMIT 30.0
+#endif
+
   uint length = worklist.length();
   int iterations = 0;
-  while(_progress && (iterations++ < CG_BUILD_ITER_LIMIT)) {
+  elapsedTimer time;
+  while(_progress &&
+        (iterations++   < CG_BUILD_ITER_LIMIT) &&
+        (time.seconds() < CG_BUILD_TIME_LIMIT)) {
+    time.start();
     _progress = false;
     for( uint next = 0; next < length; ++next ) {
       int ni = worklist.at(next);
@@ -1695,18 +1712,19 @@
       assert(n != NULL, "should be known node");
       build_connection_graph(n, igvn);
     }
+    time.stop();
   }
-  if (iterations >= CG_BUILD_ITER_LIMIT) {
-    assert(iterations < CG_BUILD_ITER_LIMIT,
-           err_msg("infinite EA connection graph build with %d nodes and worklist size %d",
-           nodes_size(), length));
+  if ((iterations     >= CG_BUILD_ITER_LIMIT) ||
+      (time.seconds() >= CG_BUILD_TIME_LIMIT)) {
+    assert(false, err_msg("infinite EA connection graph build (%f sec, %d iterations) with %d nodes and worklist size %d",
+           time.seconds(), iterations, nodes_size(), length));
     // Possible infinite build_connection_graph loop,
-    // retry compilation without escape analysis.
-    C->record_failure(C2Compiler::retry_no_escape_analysis());
+    // bailout (no changes to ideal graph were made).
     _collecting = false;
     return false;
   }
 #undef CG_BUILD_ITER_LIMIT
+#undef CG_BUILD_TIME_LIMIT
 
   // 5. Propagate escaped states.
   worklist.clear();
@@ -1724,11 +1742,20 @@
   uint alloc_length = alloc_worklist.length();
   for (uint next = 0; next < alloc_length; ++next) {
     Node* n = alloc_worklist.at(next);
-    if (ptnode_adr(n->_idx)->escape_state() == PointsToNode::NoEscape) {
+    PointsToNode::EscapeState es = ptnode_adr(n->_idx)->escape_state();
+    if (es == PointsToNode::NoEscape) {
       has_non_escaping_obj = true;
       if (n->is_Allocate()) {
         find_init_values(n, &visited, igvn);
+        // The object allocated by this Allocate node will never be
+        // seen by an other thread. Mark it so that when it is
+        // expanded no MemBarStoreStore is added.
+        n->as_Allocate()->initialization()->set_does_not_escape();
       }
+    } else if ((es == PointsToNode::ArgEscape) && n->is_Allocate()) {
+      // Same as above. Mark this Allocate node so that when it is
+      // expanded no MemBarStoreStore is added.
+      n->as_Allocate()->initialization()->set_does_not_escape();
     }
   }
 
@@ -1827,20 +1854,15 @@
       Node *n = C->macro_node(i);
       if (n->is_AbstractLock()) { // Lock and Unlock nodes
         AbstractLockNode* alock = n->as_AbstractLock();
-        if (!alock->is_eliminated() || alock->is_coarsened()) {
+        if (!alock->is_non_esc_obj()) {
           PointsToNode::EscapeState es = escape_state(alock->obj_node());
           assert(es != PointsToNode::UnknownEscape, "should know");
           if (es != PointsToNode::UnknownEscape && es != PointsToNode::GlobalEscape) {
-            if (!alock->is_eliminated()) {
-              // Mark it eliminated to update any counters
-              alock->set_eliminated();
-            } else {
-              // The lock could be marked eliminated by lock coarsening
-              // code during first IGVN before EA. Clear coarsened flag
-              // to eliminate all associated locks/unlocks and relock
-              // during deoptimization.
-              alock->clear_coarsened();
-            }
+            assert(!alock->is_eliminated() || alock->is_coarsened(), "sanity");
+            // The lock could be marked eliminated by lock coarsening
+            // code during first IGVN before EA. Replace coarsened flag
+            // to eliminate all associated locks/unlocks.
+            alock->set_non_esc_obj();
           }
         }
       }
@@ -1874,6 +1896,25 @@
       igvn->hash_delete(_pcmp_eq);
   }
 
+  // For MemBarStoreStore nodes added in library_call.cpp, check
+  // escape status of associated AllocateNode and optimize out
+  // MemBarStoreStore node if the allocated object never escapes.
+  while (storestore_worklist.length() != 0) {
+    Node *n = storestore_worklist.pop();
+    MemBarStoreStoreNode *storestore = n ->as_MemBarStoreStore();
+    Node *alloc = storestore->in(MemBarNode::Precedent)->in(0);
+    assert (alloc->is_Allocate(), "storestore should point to AllocateNode");
+    PointsToNode::EscapeState es = ptnode_adr(alloc->_idx)->escape_state();
+    if (es == PointsToNode::NoEscape || es == PointsToNode::ArgEscape) {
+      MemBarNode* mb = MemBarNode::make(C, Op_MemBarCPUOrder, Compile::AliasIdxBot);
+      mb->init_req(TypeFunc::Memory, storestore->in(TypeFunc::Memory));
+      mb->init_req(TypeFunc::Control, storestore->in(TypeFunc::Control));
+
+      _igvn->register_new_node_with_optimizer(mb);
+      _igvn->replace_node(storestore, mb);
+    }
+  }
+
 #ifndef PRODUCT
   if (PrintEscapeAnalysis) {
     dump(); // Dump ConnectionGraph
@@ -2263,9 +2304,35 @@
         PointsToNode::EscapeState arg_esc = ptnode_adr(arg->_idx)->escape_state();
         if (!arg->is_top() && at->isa_ptr() && aat->isa_ptr() &&
             (is_arraycopy || arg_esc < PointsToNode::ArgEscape)) {
-
+#ifdef ASSERT
           assert(aat == Type::TOP || aat == TypePtr::NULL_PTR ||
                  aat->isa_ptr() != NULL, "expecting an Ptr");
+          if (!(is_arraycopy ||
+                call->as_CallLeaf()->_name != NULL &&
+                (strcmp(call->as_CallLeaf()->_name, "g1_wb_pre")  == 0 ||
+                 strcmp(call->as_CallLeaf()->_name, "g1_wb_post") == 0 ))
+          ) {
+            call->dump();
+            assert(false, "EA: unexpected CallLeaf");
+          }
+#endif
+          if (arg_esc < PointsToNode::ArgEscape) {
+            set_escape_state(arg->_idx, PointsToNode::ArgEscape);
+            Node* arg_base = arg;
+            if (arg->is_AddP()) {
+              //
+              // The inline_native_clone() case when the arraycopy stub is called
+              // after the allocation before Initialize and CheckCastPP nodes.
+              // Or normal arraycopy for object arrays case.
+              //
+              // Set AddP's base (Allocate) as not scalar replaceable since
+              // pointer to the base (with offset) is passed as argument.
+              //
+              arg_base = get_addp_base(arg);
+              set_escape_state(arg_base->_idx, PointsToNode::ArgEscape);
+            }
+          }
+
           bool arg_has_oops = aat->isa_oopptr() &&
                               (aat->isa_oopptr()->klass() == NULL || aat->isa_instptr() ||
                                (aat->isa_aryptr() && aat->isa_aryptr()->klass()->is_obj_array_klass()));
@@ -2278,85 +2345,33 @@
           //   arraycopy(char[],0,Object*,0,size);
           //   arraycopy(Object*,0,char[],0,size);
           //
-          // Don't add edges from dst's fields in such cases.
+          // Do nothing special in such cases.
           //
-          bool arg_is_arraycopy_dest = src_has_oops && is_arraycopy &&
-                                       arg_has_oops && (i > TypeFunc::Parms);
-#ifdef ASSERT
-          if (!(is_arraycopy ||
-                call->as_CallLeaf()->_name != NULL &&
-                (strcmp(call->as_CallLeaf()->_name, "g1_wb_pre")  == 0 ||
-                 strcmp(call->as_CallLeaf()->_name, "g1_wb_post") == 0 ))
-          ) {
-            call->dump();
-            assert(false, "EA: unexpected CallLeaf");
-          }
-#endif
-          // Always process arraycopy's destination object since
-          // we need to add all possible edges to references in
-          // source object.
-          if (arg_esc >= PointsToNode::ArgEscape &&
-              !arg_is_arraycopy_dest) {
-            continue;
-          }
-          set_escape_state(arg->_idx, PointsToNode::ArgEscape);
-          Node* arg_base = arg;
-          if (arg->is_AddP()) {
-            //
-            // The inline_native_clone() case when the arraycopy stub is called
-            // after the allocation before Initialize and CheckCastPP nodes.
-            // Or normal arraycopy for object arrays case.
-            //
-            // Set AddP's base (Allocate) as not scalar replaceable since
-            // pointer to the base (with offset) is passed as argument.
-            //
-            arg_base = get_addp_base(arg);
-          }
-          VectorSet argset = *PointsTo(arg_base); // Clone set
-          for( VectorSetI j(&argset); j.test(); ++j ) {
-            uint pd = j.elem; // Destination object
-            set_escape_state(pd, PointsToNode::ArgEscape);
-
-            if (arg_is_arraycopy_dest) {
-              PointsToNode* ptd = ptnode_adr(pd);
-              // Conservatively reference an unknown object since
-              // not all source's fields/elements may be known.
-              add_edge_from_fields(pd, _phantom_object, Type::OffsetBot);
-
-              Node *src = call->in(TypeFunc::Parms)->uncast();
-              Node* src_base = src;
-              if (src->is_AddP()) {
-                src_base  = get_addp_base(src);
-              }
-              // Create edges from destination's fields to
-              // everything known source's fields could point to.
-              for( VectorSetI s(PointsTo(src_base)); s.test(); ++s ) {
-                uint ps = s.elem;
-                bool has_bottom_offset = false;
-                for (uint fd = 0; fd < ptd->edge_count(); fd++) {
-                  assert(ptd->edge_type(fd) == PointsToNode::FieldEdge, "expecting a field edge");
-                  int fdi = ptd->edge_target(fd);
-                  PointsToNode* pfd = ptnode_adr(fdi);
-                  int offset = pfd->offset();
-                  if (offset == Type::OffsetBot)
-                    has_bottom_offset = true;
-                  assert(offset != -1, "offset should be set");
-                  add_deferred_edge_to_fields(fdi, ps, offset);
-                }
-                // Destination object may not have access (no field edge)
-                // to fields which are accessed in source object.
-                // As result no edges will be created to those source's
-                // fields and escape state of destination object will
-                // not be propagated to those fields.
-                //
-                // Mark source object as global escape except in
-                // the case with Type::OffsetBot field (which is
-                // common case for array elements access) when
-                // edges are created to all source's fields.
-                if (!has_bottom_offset) {
-                  set_escape_state(ps, PointsToNode::GlobalEscape);
-                }
-              }
+          if (is_arraycopy && (i > TypeFunc::Parms) &&
+              src_has_oops && arg_has_oops) {
+            // Destination object's fields reference an unknown object.
+            Node* arg_base = arg;
+            if (arg->is_AddP()) {
+              arg_base = get_addp_base(arg);
+            }
+            for (VectorSetI s(PointsTo(arg_base)); s.test(); ++s) {
+              uint ps = s.elem;
+              set_escape_state(ps, PointsToNode::ArgEscape);
+              add_edge_from_fields(ps, _phantom_object, Type::OffsetBot);
+            }
+            // Conservatively all values in source object fields globally escape
+            // since we don't know if values in destination object fields
+            // escape (it could be traced but it is too expensive).
+            Node* src = call->in(TypeFunc::Parms)->uncast();
+            Node* src_base = src;
+            if (src->is_AddP()) {
+              src_base  = get_addp_base(src);
+            }
+            for (VectorSetI s(PointsTo(src_base)); s.test(); ++s) {
+              uint ps = s.elem;
+              set_escape_state(ps, PointsToNode::ArgEscape);
+              // Use OffsetTop to indicate fields global escape.
+              add_edge_from_fields(ps, _phantom_object, Type::OffsetTop);
             }
           }
         }
--- a/src/share/vm/opto/gcm.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/opto/gcm.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1344,8 +1344,8 @@
 
   // Schedule locally.  Right now a simple topological sort.
   // Later, do a real latency aware scheduler.
-  int *ready_cnt = NEW_RESOURCE_ARRAY(int,C->unique());
-  memset( ready_cnt, -1, C->unique() * sizeof(int) );
+  uint max_idx = C->unique();
+  GrowableArray<int> ready_cnt(max_idx, max_idx, -1);
   visited.Clear();
   for (i = 0; i < _num_blocks; i++) {
     if (!_blocks[i]->schedule_local(this, matcher, ready_cnt, visited)) {
--- a/src/share/vm/opto/graphKit.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/opto/graphKit.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1522,6 +1522,11 @@
                           const TypeOopPtr* val_type,
                           BasicType bt,
                           bool use_precise) {
+  // Transformation of a value which could be NULL pointer (CastPP #NULL)
+  // could be delayed during Parse (for example, in adjust_map_after_if()).
+  // Execute transformation here to avoid barrier generation in such case.
+  if (_gvn.type(val) == TypePtr::NULL_PTR)
+    val = _gvn.makecon(TypePtr::NULL_PTR);
 
   set_control(ctl);
   if (stopped()) return top(); // Dead path ?
@@ -2304,9 +2309,9 @@
   // will always succeed.  We could leave a dependency behind to ensure this.
 
   // First load the super-klass's check-offset
-  Node *p1 = basic_plus_adr( superklass, superklass, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes() );
+  Node *p1 = basic_plus_adr( superklass, superklass, in_bytes(Klass::super_check_offset_offset()) );
   Node *chk_off = _gvn.transform( new (C, 3) LoadINode( NULL, memory(p1), p1, _gvn.type(p1)->is_ptr() ) );
-  int cacheoff_con = sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes();
+  int cacheoff_con = in_bytes(Klass::secondary_super_cache_offset());
   bool might_be_cache = (find_int_con(chk_off, cacheoff_con) == cacheoff_con);
 
   // Load from the sub-klass's super-class display list, or a 1-word cache of
@@ -2934,7 +2939,7 @@
     }
   }
   constant_value = Klass::_lh_neutral_value;  // put in a known value
-  Node* lhp = basic_plus_adr(klass_node, klass_node, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc));
+  Node* lhp = basic_plus_adr(klass_node, klass_node, in_bytes(Klass::layout_helper_offset()));
   return make_load(NULL, lhp, TypeInt::INT, T_INT);
 }
 
@@ -3337,6 +3342,19 @@
   return NULL;
 }
 
+// Trace Allocate -> Proj[Parm] -> MemBarStoreStore
+MemBarStoreStoreNode* AllocateNode::storestore() {
+  ProjNode* rawoop = proj_out(AllocateNode::RawAddress);
+  if (rawoop == NULL)  return NULL;
+  for (DUIterator_Fast imax, i = rawoop->fast_outs(imax); i < imax; i++) {
+    Node* storestore = rawoop->fast_out(i);
+    if (storestore->is_MemBarStoreStore()) {
+      return storestore->as_MemBarStoreStore();
+    }
+  }
+  return NULL;
+}
+
 //----------------------------- loop predicates ---------------------------
 
 //------------------------------add_predicate_impl----------------------------
--- a/src/share/vm/opto/lcm.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/opto/lcm.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -404,7 +404,7 @@
 // remaining cases (most), choose the instruction with the greatest latency
 // (that is, the most number of pseudo-cycles required to the end of the
 // routine). If there is a tie, choose the instruction with the most inputs.
-Node *Block::select(PhaseCFG *cfg, Node_List &worklist, int *ready_cnt, VectorSet &next_call, uint sched_slot) {
+Node *Block::select(PhaseCFG *cfg, Node_List &worklist, GrowableArray<int> &ready_cnt, VectorSet &next_call, uint sched_slot) {
 
   // If only a single entry on the stack, use it
   uint cnt = worklist.size();
@@ -465,7 +465,7 @@
 
         // More than this instruction pending for successor to be ready,
         // don't choose this if other opportunities are ready
-        if (ready_cnt[use->_idx] > 1)
+        if (ready_cnt.at(use->_idx) > 1)
           n_choice = 1;
       }
 
@@ -565,7 +565,7 @@
 
 
 //------------------------------sched_call-------------------------------------
-uint Block::sched_call( Matcher &matcher, Block_Array &bbs, uint node_cnt, Node_List &worklist, int *ready_cnt, MachCallNode *mcall, VectorSet &next_call ) {
+uint Block::sched_call( Matcher &matcher, Block_Array &bbs, uint node_cnt, Node_List &worklist, GrowableArray<int> &ready_cnt, MachCallNode *mcall, VectorSet &next_call ) {
   RegMask regs;
 
   // Schedule all the users of the call right now.  All the users are
@@ -574,8 +574,9 @@
   for (DUIterator_Fast imax, i = mcall->fast_outs(imax); i < imax; i++) {
     Node* n = mcall->fast_out(i);
     assert( n->is_MachProj(), "" );
-    --ready_cnt[n->_idx];
-    assert( !ready_cnt[n->_idx], "" );
+    int n_cnt = ready_cnt.at(n->_idx)-1;
+    ready_cnt.at_put(n->_idx, n_cnt);
+    assert( n_cnt == 0, "" );
     // Schedule next to call
     _nodes.map(node_cnt++, n);
     // Collect defined registers
@@ -590,7 +591,9 @@
       Node* m = n->fast_out(j); // Get user
       if( bbs[m->_idx] != this ) continue;
       if( m->is_Phi() ) continue;
-      if( !--ready_cnt[m->_idx] )
+      int m_cnt = ready_cnt.at(m->_idx)-1;
+      ready_cnt.at_put(m->_idx, m_cnt);
+      if( m_cnt == 0 )
         worklist.push(m);
     }
 
@@ -655,7 +658,7 @@
 
 //------------------------------schedule_local---------------------------------
 // Topological sort within a block.  Someday become a real scheduler.
-bool Block::schedule_local(PhaseCFG *cfg, Matcher &matcher, int *ready_cnt, VectorSet &next_call) {
+bool Block::schedule_local(PhaseCFG *cfg, Matcher &matcher, GrowableArray<int> &ready_cnt, VectorSet &next_call) {
   // Already "sorted" are the block start Node (as the first entry), and
   // the block-ending Node and any trailing control projections.  We leave
   // these alone.  PhiNodes and ParmNodes are made to follow the block start
@@ -695,7 +698,7 @@
         if( m && cfg->_bbs[m->_idx] == this && !m->is_top() )
           local++;              // One more block-local input
       }
-      ready_cnt[n->_idx] = local; // Count em up
+      ready_cnt.at_put(n->_idx, local); // Count em up
 
 #ifdef ASSERT
       if( UseConcMarkSweepGC || UseG1GC ) {
@@ -729,7 +732,7 @@
     }
   }
   for(uint i2=i; i2<_nodes.size(); i2++ ) // Trailing guys get zapped count
-    ready_cnt[_nodes[i2]->_idx] = 0;
+    ready_cnt.at_put(_nodes[i2]->_idx, 0);
 
   // All the prescheduled guys do not hold back internal nodes
   uint i3;
@@ -737,8 +740,10 @@
     Node *n = _nodes[i3];       // Get pre-scheduled
     for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
       Node* m = n->fast_out(j);
-      if( cfg->_bbs[m->_idx] ==this ) // Local-block user
-        ready_cnt[m->_idx]--;   // Fix ready count
+      if( cfg->_bbs[m->_idx] ==this ) { // Local-block user
+        int m_cnt = ready_cnt.at(m->_idx)-1;
+        ready_cnt.at_put(m->_idx, m_cnt);   // Fix ready count
+      }
     }
   }
 
@@ -747,7 +752,7 @@
   Node_List worklist;
   for(uint i4=i3; i4<node_cnt; i4++ ) {    // Put ready guys on worklist
     Node *m = _nodes[i4];
-    if( !ready_cnt[m->_idx] ) {   // Zero ready count?
+    if( !ready_cnt.at(m->_idx) ) {   // Zero ready count?
       if (m->is_iteratively_computed()) {
         // Push induction variable increments last to allow other uses
         // of the phi to be scheduled first. The select() method breaks
@@ -775,14 +780,14 @@
       for (uint j=0; j<_nodes.size(); j++) {
         Node     *n = _nodes[j];
         int     idx = n->_idx;
-        tty->print("#   ready cnt:%3d  ", ready_cnt[idx]);
+        tty->print("#   ready cnt:%3d  ", ready_cnt.at(idx));
         tty->print("latency:%3d  ", cfg->_node_latency->at_grow(idx));
         tty->print("%4d: %s\n", idx, n->Name());
       }
     }
 #endif
 
-  uint max_idx = matcher.C->unique();
+  uint max_idx = (uint)ready_cnt.length();
   // Pull from worklist and schedule
   while( worklist.size() ) {    // Worklist is not ready
 
@@ -840,11 +845,13 @@
       Node* m = n->fast_out(i5); // Get user
       if( cfg->_bbs[m->_idx] != this ) continue;
       if( m->is_Phi() ) continue;
-      if (m->_idx > max_idx) { // new node, skip it
+      if (m->_idx >= max_idx) { // new node, skip it
         assert(m->is_MachProj() && n->is_Mach() && n->as_Mach()->has_call(), "unexpected node types");
         continue;
       }
-      if( !--ready_cnt[m->_idx] )
+      int m_cnt = ready_cnt.at(m->_idx)-1;
+      ready_cnt.at_put(m->_idx, m_cnt);
+      if( m_cnt == 0 )
         worklist.push(m);
     }
   }
--- a/src/share/vm/opto/library_call.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/opto/library_call.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -819,7 +819,7 @@
   if (stopped())
     return NULL;                // already stopped
   bool zero_offset = _gvn.type(offset) == TypeInt::ZERO;
-  if (zero_offset && _gvn.eqv_uncast(subseq_length, array_length))
+  if (zero_offset && subseq_length->eqv_uncast(array_length))
     return NULL;                // common case of whole-array copy
   Node* last = subseq_length;
   if (!zero_offset)             // last += offset
@@ -2153,7 +2153,7 @@
   //
   // if (offset == java_lang_ref_Reference::_reference_offset) {
   //   if (base != null) {
-  //     if (klass(base)->reference_type() != REF_NONE)) {
+  //     if (instance_of(base, java.lang.ref.Reference)) {
   //       pre_barrier(_, pre_val, ...);
   //     }
   //   }
@@ -2165,9 +2165,6 @@
   IdealKit ideal(this);
 #define __ ideal.
 
-  const int reference_type_offset = instanceKlass::reference_type_offset_in_bytes() +
-                                        sizeof(oopDesc);
-
   Node* referent_off = __ ConX(java_lang_ref_Reference::referent_offset);
 
   __ if_then(offset, BoolTest::eq, referent_off, unlikely); {
@@ -2679,7 +2676,13 @@
     cas = _gvn.transform(new (C, 5) CompareAndSwapLNode(control(), mem, adr, newval, oldval));
     break;
   case T_OBJECT:
-     // reference stores need a store barrier.
+    // Transformation of a value which could be NULL pointer (CastPP #NULL)
+    // could be delayed during Parse (for example, in adjust_map_after_if()).
+    // Execute transformation here to avoid barrier generation in such case.
+    if (_gvn.type(newval) == TypePtr::NULL_PTR)
+      newval = _gvn.makecon(TypePtr::NULL_PTR);
+
+    // Reference stores need a store barrier.
     // (They don't if CAS fails, but it isn't worth checking.)
     pre_barrier(true /* do_load*/,
                 control(), base, adr, alias_idx, newval, value_type->make_oopptr(),
@@ -2806,8 +2809,10 @@
   // Note:  The argument might still be an illegal value like
   // Serializable.class or Object[].class.   The runtime will handle it.
   // But we must make an explicit check for initialization.
-  Node* insp = basic_plus_adr(kls, instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc));
-  Node* inst = make_load(NULL, insp, TypeInt::INT, T_INT);
+  Node* insp = basic_plus_adr(kls, in_bytes(instanceKlass::init_state_offset()));
+  // Use T_BOOLEAN for instanceKlass::_init_state so the compiler
+  // can generate code to load it as unsigned byte.
+  Node* inst = make_load(NULL, insp, TypeInt::UBYTE, T_BOOLEAN);
   Node* bits = intcon(instanceKlass::fully_initialized);
   Node* test = _gvn.transform( new (C, 3) SubINode(inst, bits) );
   // The 'test' is non-zero if we need to take a slow path.
@@ -2954,7 +2959,7 @@
 //---------------------------load_mirror_from_klass----------------------------
 // Given a klass oop, load its java mirror (a java.lang.Class oop).
 Node* LibraryCallKit::load_mirror_from_klass(Node* klass) {
-  Node* p = basic_plus_adr(klass, Klass::java_mirror_offset_in_bytes() + sizeof(oopDesc));
+  Node* p = basic_plus_adr(klass, in_bytes(Klass::java_mirror_offset()));
   return make_load(NULL, p, TypeInstPtr::MIRROR, T_OBJECT);
 }
 
@@ -2994,7 +2999,7 @@
 Node* LibraryCallKit::generate_access_flags_guard(Node* kls, int modifier_mask, int modifier_bits, RegionNode* region) {
   // Branch around if the given klass has the given modifier bit set.
   // Like generate_guard, adds a new path onto the region.
-  Node* modp = basic_plus_adr(kls, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc));
+  Node* modp = basic_plus_adr(kls, in_bytes(Klass::access_flags_offset()));
   Node* mods = make_load(NULL, modp, TypeInt::INT, T_INT);
   Node* mask = intcon(modifier_mask);
   Node* bits = intcon(modifier_bits);
@@ -3115,7 +3120,7 @@
     break;
 
   case vmIntrinsics::_getModifiers:
-    p = basic_plus_adr(kls, Klass::modifier_flags_offset_in_bytes() + sizeof(oopDesc));
+    p = basic_plus_adr(kls, in_bytes(Klass::modifier_flags_offset()));
     query_value = make_load(NULL, p, TypeInt::INT, T_INT);
     break;
 
@@ -3155,7 +3160,7 @@
       // A guard was added.  If the guard is taken, it was an array.
       phi->add_req(makecon(TypeInstPtr::make(env()->Object_klass()->java_mirror())));
     // If we fall through, it's a plain class.  Get its _super.
-    p = basic_plus_adr(kls, Klass::super_offset_in_bytes() + sizeof(oopDesc));
+    p = basic_plus_adr(kls, in_bytes(Klass::super_offset()));
     kls = _gvn.transform( LoadKlassNode::make(_gvn, immutable_memory(), p, TypeRawPtr::BOTTOM, TypeKlassPtr::OBJECT_OR_NULL) );
     null_ctl = top();
     kls = null_check_oop(kls, &null_ctl);
@@ -3173,7 +3178,7 @@
     if (generate_array_guard(kls, region) != NULL) {
       // Be sure to pin the oop load to the guard edge just created:
       Node* is_array_ctrl = region->in(region->req()-1);
-      Node* cma = basic_plus_adr(kls, in_bytes(arrayKlass::component_mirror_offset()) + sizeof(oopDesc));
+      Node* cma = basic_plus_adr(kls, in_bytes(arrayKlass::component_mirror_offset()));
       Node* cmo = make_load(is_array_ctrl, cma, TypeInstPtr::MIRROR, T_OBJECT);
       phi->add_req(cmo);
     }
@@ -3181,7 +3186,7 @@
     break;
 
   case vmIntrinsics::_getClassAccessFlags:
-    p = basic_plus_adr(kls, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc));
+    p = basic_plus_adr(kls, in_bytes(Klass::access_flags_offset()));
     query_value = make_load(NULL, p, TypeInt::INT, T_INT);
     break;
 
@@ -4194,12 +4199,17 @@
   Node* raw_obj = alloc_obj->in(1);
   assert(alloc_obj->is_CheckCastPP() && raw_obj->is_Proj() && raw_obj->in(0)->is_Allocate(), "");
 
+  AllocateNode* alloc = NULL;
   if (ReduceBulkZeroing) {
     // We will be completely responsible for initializing this object -
     // mark Initialize node as complete.
-    AllocateNode* alloc = AllocateNode::Ideal_allocation(alloc_obj, &_gvn);
+    alloc = AllocateNode::Ideal_allocation(alloc_obj, &_gvn);
     // The object was just allocated - there should be no any stores!
     guarantee(alloc != NULL && alloc->maybe_set_complete(&_gvn), "");
+    // Mark as complete_with_arraycopy so that on AllocateNode
+    // expansion, we know this AllocateNode is initialized by an array
+    // copy and a StoreStore barrier exists after the array copy.
+    alloc->initialization()->set_complete_with_arraycopy();
   }
 
   // Copy the fastest available way.
@@ -4261,7 +4271,18 @@
   }
 
   // Do not let reads from the cloned object float above the arraycopy.
-  insert_mem_bar(Op_MemBarCPUOrder);
+  if (alloc != NULL) {
+    // Do not let stores that initialize this object be reordered with
+    // a subsequent store that would make this object accessible by
+    // other threads.
+    // Record what AllocateNode this StoreStore protects so that
+    // escape analysis can go from the MemBarStoreStoreNode to the
+    // AllocateNode and eliminate the MemBarStoreStoreNode if possible
+    // based on the escape status of the AllocateNode.
+    insert_mem_bar(Op_MemBarStoreStore, alloc->proj_out(AllocateNode::RawAddress));
+  } else {
+    insert_mem_bar(Op_MemBarCPUOrder);
+  }
 }
 
 //------------------------inline_native_clone----------------------------
@@ -4650,7 +4671,7 @@
   if (ReduceBulkZeroing
       && !ZeroTLAB              // pointless if already zeroed
       && basic_elem_type != T_CONFLICT // avoid corner case
-      && !_gvn.eqv_uncast(src, dest)
+      && !src->eqv_uncast(dest)
       && ((alloc = tightly_coupled_allocation(dest, slow_region))
           != NULL)
       && _gvn.find_int_con(alloc->in(AllocateNode::ALength), 1) > 0
@@ -4728,7 +4749,7 @@
     // copy_length is 0.
     if (!stopped() && dest_uninitialized) {
       Node* dest_length = alloc->in(AllocateNode::ALength);
-      if (_gvn.eqv_uncast(copy_length, dest_length)
+      if (copy_length->eqv_uncast(dest_length)
           || _gvn.find_int_con(dest_length, 1) <= 0) {
         // There is no zeroing to do. No need for a secondary raw memory barrier.
       } else {
@@ -4774,7 +4795,7 @@
     // with its attendant messy index arithmetic, and upgrade
     // the copy to a more hardware-friendly word size of 64 bits.
     Node* tail_ctl = NULL;
-    if (!stopped() && !_gvn.eqv_uncast(dest_tail, dest_length)) {
+    if (!stopped() && !dest_tail->eqv_uncast(dest_length)) {
       Node* cmp_lt   = _gvn.transform( new(C,3) CmpINode(dest_tail, dest_length) );
       Node* bol_lt   = _gvn.transform( new(C,2) BoolNode(cmp_lt, BoolTest::lt) );
       tail_ctl = generate_slow_guard(bol_lt, NULL);
@@ -4857,7 +4878,7 @@
       PreserveJVMState pjvms(this);
       set_control(not_subtype_ctrl);
       // (At this point we can assume disjoint_bases, since types differ.)
-      int ek_offset = objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc);
+      int ek_offset = in_bytes(objArrayKlass::element_klass_offset());
       Node* p1 = basic_plus_adr(dest_klass, ek_offset);
       Node* n1 = LoadKlassNode::make(_gvn, immutable_memory(), p1, TypeRawPtr::BOTTOM);
       Node* dest_elem_klass = _gvn.transform(n1);
@@ -5004,7 +5025,16 @@
   // the membar also.
   //
   // Do not let reads from the cloned object float above the arraycopy.
-  if (InsertMemBarAfterArraycopy || alloc != NULL)
+  if (alloc != NULL) {
+    // Do not let stores that initialize this object be reordered with
+    // a subsequent store that would make this object accessible by
+    // other threads.
+    // Record what AllocateNode this StoreStore protects so that
+    // escape analysis can go from the MemBarStoreStoreNode to the
+    // AllocateNode and eliminate the MemBarStoreStoreNode if possible
+    // based on the escape status of the AllocateNode.
+    insert_mem_bar(Op_MemBarStoreStore, alloc->proj_out(AllocateNode::RawAddress));
+  } else if (InsertMemBarAfterArraycopy)
     insert_mem_bar(Op_MemBarCPUOrder);
 }
 
@@ -5308,7 +5338,7 @@
   // for the target array.  This is an optimistic check.  It will
   // look in each non-null element's class, at the desired klass's
   // super_check_offset, for the desired klass.
-  int sco_offset = Klass::super_check_offset_offset_in_bytes() + sizeof(oopDesc);
+  int sco_offset = in_bytes(Klass::super_check_offset_offset());
   Node* p3 = basic_plus_adr(dest_elem_klass, sco_offset);
   Node* n3 = new(C, 3) LoadINode(NULL, memory(p3), p3, _gvn.type(p3)->is_ptr());
   Node* check_offset = ConvI2X(_gvn.transform(n3));
--- a/src/share/vm/opto/locknode.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/opto/locknode.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -49,18 +49,22 @@
 
 //-----------------------------hash--------------------------------------------
 uint BoxLockNode::hash() const {
+  if (EliminateNestedLocks)
+    return NO_HASH; // Each locked region has own BoxLock node
   return Node::hash() + _slot + (_is_eliminated ? Compile::current()->fixed_slots() : 0);
 }
 
 //------------------------------cmp--------------------------------------------
 uint BoxLockNode::cmp( const Node &n ) const {
+  if (EliminateNestedLocks)
+    return (&n == this); // Always fail except on self
   const BoxLockNode &bn = (const BoxLockNode &)n;
   return bn._slot == _slot && bn._is_eliminated == _is_eliminated;
 }
 
-OptoReg::Name BoxLockNode::stack_slot(Node* box_node) {
-  // Chase down the BoxNode
-  while (!box_node->is_BoxLock()) {
+BoxLockNode* BoxLockNode::box_node(Node* box) {
+  // Chase down the BoxNode after RA which may spill box nodes.
+  while (!box->is_BoxLock()) {
     //    if (box_node->is_SpillCopy()) {
     //      Node *m = box_node->in(1);
     //      if (m->is_Mach() && m->as_Mach()->ideal_Opcode() == Op_StoreP) {
@@ -68,10 +72,64 @@
     //        continue;
     //      }
     //    }
-    assert(box_node->is_SpillCopy() || box_node->is_Phi(), "Bad spill of Lock.");
-    box_node = box_node->in(1);
+    assert(box->is_SpillCopy() || box->is_Phi(), "Bad spill of Lock.");
+    // Only BoxLock nodes with the same stack slot are merged.
+    // So it is enough to trace one path to find the slot value.
+    box = box->in(1);
   }
-  return box_node->in_RegMask(0).find_first_elem();
+  return box->as_BoxLock();
+}
+
+OptoReg::Name BoxLockNode::reg(Node* box) {
+  return box_node(box)->in_RegMask(0).find_first_elem();
+}
+
+// Is BoxLock node used for one simple lock region (same box and obj)?
+bool BoxLockNode::is_simple_lock_region(LockNode** unique_lock, Node* obj) {
+  LockNode* lock = NULL;
+  bool has_one_lock = false;
+  for (uint i = 0; i < this->outcnt(); i++) {
+    Node* n = this->raw_out(i);
+    assert(!n->is_Phi(), "should not merge BoxLock nodes");
+    if (n->is_AbstractLock()) {
+      AbstractLockNode* alock = n->as_AbstractLock();
+      // Check lock's box since box could be referenced by Lock's debug info.
+      if (alock->box_node() == this) {
+        if (alock->obj_node()->eqv_uncast(obj)) {
+          if ((unique_lock != NULL) && alock->is_Lock()) {
+            if (lock == NULL) {
+              lock = alock->as_Lock();
+              has_one_lock = true;
+            } else if (lock != alock->as_Lock()) {
+              has_one_lock = false;
+            }
+          }
+        } else {
+          return false; // Different objects
+        }
+      }
+    }
+  }
+#ifdef ASSERT
+  // Verify that FastLock and Safepoint reference only this lock region.
+  for (uint i = 0; i < this->outcnt(); i++) {
+    Node* n = this->raw_out(i);
+    if (n->is_FastLock()) {
+      FastLockNode* flock = n->as_FastLock();
+      assert((flock->box_node() == this) && flock->obj_node()->eqv_uncast(obj),"");
+    }
+    // Don't check monitor info in safepoints since the referenced object could
+    // be different from the locked object. It could be Phi node of different
+    // cast nodes which point to this locked object.
+    // We assume that no other objects could be referenced in monitor info
+    // associated with this BoxLock node because all associated locks and
+    // unlocks are reference only this one object.
+  }
+#endif
+  if (unique_lock != NULL && has_one_lock) {
+    *unique_lock = lock;
+  }
+  return true;
 }
 
 //=============================================================================
--- a/src/share/vm/opto/locknode.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/opto/locknode.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -49,11 +49,11 @@
 
 //------------------------------BoxLockNode------------------------------------
 class BoxLockNode : public Node {
+  const int     _slot; // stack slot
+  RegMask     _inmask; // OptoReg corresponding to stack slot
+  bool _is_eliminated; // Associated locks were safely eliminated
+
 public:
-  const int _slot;
-  RegMask   _inmask;
-  bool _is_eliminated;    // indicates this lock was safely eliminated
-
   BoxLockNode( int lock );
   virtual int Opcode() const;
   virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const;
@@ -66,11 +66,19 @@
   virtual const class Type *bottom_type() const { return TypeRawPtr::BOTTOM; }
   virtual uint ideal_reg() const { return Op_RegP; }
 
-  static OptoReg::Name stack_slot(Node* box_node);
+  static OptoReg::Name reg(Node* box_node);
+  static BoxLockNode* box_node(Node* box_node);
+  static bool same_slot(Node* box1, Node* box2) {
+    return box1->as_BoxLock()->_slot == box2->as_BoxLock()->_slot;
+  }
+  int stack_slot() const { return _slot; }
 
-  bool is_eliminated()  { return _is_eliminated; }
+  bool is_eliminated() const { return _is_eliminated; }
   // mark lock as eliminated.
-  void set_eliminated() { _is_eliminated = true; }
+  void set_eliminated()      { _is_eliminated = true; }
+
+  // Is BoxLock node used for one simple lock region?
+  bool is_simple_lock_region(LockNode** unique_lock, Node* obj);
 
 #ifndef PRODUCT
   virtual void format( PhaseRegAlloc *, outputStream *st ) const;
@@ -91,6 +99,7 @@
   }
   Node* obj_node() const { return in(1); }
   Node* box_node() const { return in(2); }
+  void  set_box_node(Node* box) { set_req(2, box); }
 
   // FastLock and FastUnlockNode do not hash, we need one for each correspoding
   // LockNode/UnLockNode to avoid creating Phi's.
--- a/src/share/vm/opto/loopnode.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/opto/loopnode.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -898,7 +898,7 @@
 Node* CountedLoopNode::match_incr_with_optional_truncation(
                       Node* expr, Node** trunc1, Node** trunc2, const TypeInt** trunc_type) {
   // Quick cutouts:
-  if (expr == NULL || expr->req() != 3)  return false;
+  if (expr == NULL || expr->req() != 3)  return NULL;
 
   Node *t1 = NULL;
   Node *t2 = NULL;
@@ -3278,16 +3278,7 @@
 #ifdef ASSERT
     if (legal->is_Start() && !early->is_Root()) {
       // Bad graph. Print idom path and fail.
-      tty->print_cr( "Bad graph detected in build_loop_late");
-      tty->print("n: ");n->dump(); tty->cr();
-      tty->print("early: ");early->dump(); tty->cr();
-      int ct = 0;
-      Node *dbg_legal = LCA;
-      while(!dbg_legal->is_Start() && ct < 100) {
-        tty->print("idom[%d] ",ct); dbg_legal->dump(); tty->cr();
-        ct++;
-        dbg_legal = idom(dbg_legal);
-      }
+      dump_bad_graph(n, early, LCA);
       assert(false, "Bad graph detected in build_loop_late");
     }
 #endif
@@ -3337,6 +3328,88 @@
     chosen_loop->_body.push(n);// Collect inner loops
 }
 
+#ifdef ASSERT
+void PhaseIdealLoop::dump_bad_graph(Node* n, Node* early, Node* LCA) {
+  tty->print_cr( "Bad graph detected in build_loop_late");
+  tty->print("n: "); n->dump();
+  tty->print("early(n): "); early->dump();
+  if (n->in(0) != NULL  && !n->in(0)->is_top() &&
+      n->in(0) != early && !n->in(0)->is_Root()) {
+    tty->print("n->in(0): "); n->in(0)->dump();
+  }
+  for (uint i = 1; i < n->req(); i++) {
+    Node* in1 = n->in(i);
+    if (in1 != NULL && in1 != n && !in1->is_top()) {
+      tty->print("n->in(%d): ", i); in1->dump();
+      Node* in1_early = get_ctrl(in1);
+      tty->print("early(n->in(%d)): ", i); in1_early->dump();
+      if (in1->in(0) != NULL     && !in1->in(0)->is_top() &&
+          in1->in(0) != in1_early && !in1->in(0)->is_Root()) {
+        tty->print("n->in(%d)->in(0): ", i); in1->in(0)->dump();
+      }
+      for (uint j = 1; j < in1->req(); j++) {
+        Node* in2 = in1->in(j);
+        if (in2 != NULL && in2 != n && in2 != in1 && !in2->is_top()) {
+          tty->print("n->in(%d)->in(%d): ", i, j); in2->dump();
+          Node* in2_early = get_ctrl(in2);
+          tty->print("early(n->in(%d)->in(%d)): ", i, j); in2_early->dump();
+          if (in2->in(0) != NULL     && !in2->in(0)->is_top() &&
+              in2->in(0) != in2_early && !in2->in(0)->is_Root()) {
+            tty->print("n->in(%d)->in(%d)->in(0): ", i, j); in2->in(0)->dump();
+          }
+        }
+      }
+    }
+  }
+  tty->cr();
+  tty->print("LCA(n): "); LCA->dump();
+  for (uint i = 0; i < n->outcnt(); i++) {
+    Node* u1 = n->raw_out(i);
+    if (u1 == n)
+      continue;
+    tty->print("n->out(%d): ", i); u1->dump();
+    if (u1->is_CFG()) {
+      for (uint j = 0; j < u1->outcnt(); j++) {
+        Node* u2 = u1->raw_out(j);
+        if (u2 != u1 && u2 != n && u2->is_CFG()) {
+          tty->print("n->out(%d)->out(%d): ", i, j); u2->dump();
+        }
+      }
+    } else {
+      Node* u1_later = get_ctrl(u1);
+      tty->print("later(n->out(%d)): ", i); u1_later->dump();
+      if (u1->in(0) != NULL     && !u1->in(0)->is_top() &&
+          u1->in(0) != u1_later && !u1->in(0)->is_Root()) {
+        tty->print("n->out(%d)->in(0): ", i); u1->in(0)->dump();
+      }
+      for (uint j = 0; j < u1->outcnt(); j++) {
+        Node* u2 = u1->raw_out(j);
+        if (u2 == n || u2 == u1)
+          continue;
+        tty->print("n->out(%d)->out(%d): ", i, j); u2->dump();
+        if (!u2->is_CFG()) {
+          Node* u2_later = get_ctrl(u2);
+          tty->print("later(n->out(%d)->out(%d)): ", i, j); u2_later->dump();
+          if (u2->in(0) != NULL     && !u2->in(0)->is_top() &&
+              u2->in(0) != u2_later && !u2->in(0)->is_Root()) {
+            tty->print("n->out(%d)->in(0): ", i); u2->in(0)->dump();
+          }
+        }
+      }
+    }
+  }
+  tty->cr();
+  int ct = 0;
+  Node *dbg_legal = LCA;
+  while(!dbg_legal->is_Start() && ct < 100) {
+    tty->print("idom[%d] ",ct); dbg_legal->dump();
+    ct++;
+    dbg_legal = idom(dbg_legal);
+  }
+  tty->cr();
+}
+#endif
+
 #ifndef PRODUCT
 //------------------------------dump-------------------------------------------
 void PhaseIdealLoop::dump( ) const {
--- a/src/share/vm/opto/loopnode.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/opto/loopnode.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1040,6 +1040,10 @@
   bool created_loop_node()     { return _created_loop_node; }
   void register_new_node( Node *n, Node *blk );
 
+#ifdef ASSERT
+void dump_bad_graph(Node* n, Node* early, Node* LCA);
+#endif
+
 #ifndef PRODUCT
   void dump( ) const;
   void dump( IdealLoopTree *loop, uint rpo_idx, Node_List &rpo_list ) const;
--- a/src/share/vm/opto/loopopts.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/opto/loopopts.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -819,6 +819,8 @@
     if( iff->is_If() ) {        // Classic split-if?
       if( iff->in(0) != n_ctrl ) return; // Compare must be in same blk as if
     } else if (iff->is_CMove()) { // Trying to split-up a CMOVE
+      // Can't split CMove with different control edge.
+      if (iff->in(0) != NULL && iff->in(0) != n_ctrl ) return;
       if( get_ctrl(iff->in(2)) == n_ctrl ||
           get_ctrl(iff->in(3)) == n_ctrl )
         return;                 // Inputs not yet split-up
@@ -937,7 +939,7 @@
       }
       bool did_break = (i < imax);  // Did we break out of the previous loop?
       if (!did_break && n->outcnt() > 1) { // All uses in outer loops!
-        Node *late_load_ctrl;
+        Node *late_load_ctrl = NULL;
         if (n->is_Load()) {
           // If n is a load, get and save the result from get_late_ctrl(),
           // to be later used in calculating the control for n's clones.
--- a/src/share/vm/opto/macro.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/opto/macro.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -234,11 +234,20 @@
     }
   } else {
     // G1 pre/post barriers
-    assert(p2x->outcnt() == 2, "expects 2 users: Xor and URShift nodes");
+    assert(p2x->outcnt() <= 2, "expects 1 or 2 users: Xor and URShift nodes");
     // It could be only one user, URShift node, in Object.clone() instrinsic
     // but the new allocation is passed to arraycopy stub and it could not
     // be scalar replaced. So we don't check the case.
 
+    // An other case of only one user (Xor) is when the value check for NULL
+    // in G1 post barrier is folded after CCP so the code which used URShift
+    // is removed.
+
+    // Take Region node before eliminating post barrier since it also
+    // eliminates CastP2X node when it has only one user.
+    Node* this_region = p2x->in(0);
+    assert(this_region != NULL, "");
+
     // Remove G1 post barrier.
 
     // Search for CastP2X->Xor->URShift->Cmp path which
@@ -263,8 +272,6 @@
     // Remove G1 pre barrier.
 
     // Search "if (marking != 0)" check and set it to "false".
-    Node* this_region = p2x->in(0);
-    assert(this_region != NULL, "");
     // There is no G1 pre barrier if previous stored value is NULL
     // (for example, after initialization).
     if (this_region->is_Region() && this_region->req() == 3) {
@@ -292,7 +299,7 @@
     }
     // Now CastP2X can be removed since it is used only on dead path
     // which currently still alive until igvn optimize it.
-    assert(p2x->unique_out()->Opcode() == Op_URShiftX, "");
+    assert(p2x->outcnt() == 0 || p2x->unique_out()->Opcode() == Op_URShiftX, "");
     _igvn.replace_node(p2x, top());
   }
 }
@@ -1088,6 +1095,12 @@
   Node* klass_node        = alloc->in(AllocateNode::KlassNode);
   Node* initial_slow_test = alloc->in(AllocateNode::InitialTest);
 
+  Node* storestore = alloc->storestore();
+  if (storestore != NULL) {
+    // Break this link that is no longer useful and confuses register allocation
+    storestore->set_req(MemBarNode::Precedent, top());
+  }
+
   assert(ctrl != NULL, "must have control");
   // We need a Region and corresponding Phi's to merge the slow-path and fast-path results.
   // they will not be used if "always_slow" is set
@@ -1289,10 +1302,66 @@
                                    0, new_alloc_bytes, T_LONG);
     }
 
+    InitializeNode* init = alloc->initialization();
     fast_oop_rawmem = initialize_object(alloc,
                                         fast_oop_ctrl, fast_oop_rawmem, fast_oop,
                                         klass_node, length, size_in_bytes);
 
+    // If initialization is performed by an array copy, any required
+    // MemBarStoreStore was already added. If the object does not
+    // escape no need for a MemBarStoreStore. Otherwise we need a
+    // MemBarStoreStore so that stores that initialize this object
+    // can't be reordered with a subsequent store that makes this
+    // object accessible by other threads.
+    if (init == NULL || (!init->is_complete_with_arraycopy() && !init->does_not_escape())) {
+      if (init == NULL || init->req() < InitializeNode::RawStores) {
+        // No InitializeNode or no stores captured by zeroing
+        // elimination. Simply add the MemBarStoreStore after object
+        // initialization.
+        MemBarNode* mb = MemBarNode::make(C, Op_MemBarStoreStore, Compile::AliasIdxBot, fast_oop_rawmem);
+        transform_later(mb);
+
+        mb->init_req(TypeFunc::Memory, fast_oop_rawmem);
+        mb->init_req(TypeFunc::Control, fast_oop_ctrl);
+        fast_oop_ctrl = new (C, 1) ProjNode(mb,TypeFunc::Control);
+        transform_later(fast_oop_ctrl);
+        fast_oop_rawmem = new (C, 1) ProjNode(mb,TypeFunc::Memory);
+        transform_later(fast_oop_rawmem);
+      } else {
+        // Add the MemBarStoreStore after the InitializeNode so that
+        // all stores performing the initialization that were moved
+        // before the InitializeNode happen before the storestore
+        // barrier.
+
+        Node* init_ctrl = init->proj_out(TypeFunc::Control);
+        Node* init_mem = init->proj_out(TypeFunc::Memory);
+
+        MemBarNode* mb = MemBarNode::make(C, Op_MemBarStoreStore, Compile::AliasIdxBot);
+        transform_later(mb);
+
+        Node* ctrl = new (C, 1) ProjNode(init,TypeFunc::Control);
+        transform_later(ctrl);
+        Node* mem = new (C, 1) ProjNode(init,TypeFunc::Memory);
+        transform_later(mem);
+
+        // The MemBarStoreStore depends on control and memory coming
+        // from the InitializeNode
+        mb->init_req(TypeFunc::Memory, mem);
+        mb->init_req(TypeFunc::Control, ctrl);
+
+        ctrl = new (C, 1) ProjNode(mb,TypeFunc::Control);
+        transform_later(ctrl);
+        mem = new (C, 1) ProjNode(mb,TypeFunc::Memory);
+        transform_later(mem);
+
+        // All nodes that depended on the InitializeNode for control
+        // and memory must now depend on the MemBarNode that itself
+        // depends on the InitializeNode
+        _igvn.replace_node(init_ctrl, ctrl);
+        _igvn.replace_node(init_mem, mem);
+      }
+    }
+
     if (C->env()->dtrace_extended_probes()) {
       // Slow-path call
       int size = TypeFunc::Parms + 2;
@@ -1326,6 +1395,7 @@
     result_phi_rawmem->init_req(fast_result_path, fast_oop_rawmem);
   } else {
     slow_region = ctrl;
+    result_phi_i_o = i_o; // Rename it to use in the following code.
   }
 
   // Generate slow-path call
@@ -1350,6 +1420,10 @@
   copy_call_debug_info((CallNode *) alloc,  call);
   if (!always_slow) {
     call->set_cnt(PROB_UNLIKELY_MAG(4));  // Same effect as RC_UNCOMMON.
+  } else {
+    // Hook i_o projection to avoid its elimination during allocation
+    // replacement (when only a slow call is generated).
+    call->set_req(TypeFunc::I_O, result_phi_i_o);
   }
   _igvn.replace_node(alloc, call);
   transform_later(call);
@@ -1366,8 +1440,10 @@
   //
   extract_call_projections(call);
 
-  // An allocate node has separate memory projections for the uses on the control and i_o paths
-  // Replace uses of the control memory projection with result_phi_rawmem (unless we are only generating a slow call)
+  // An allocate node has separate memory projections for the uses on
+  // the control and i_o paths. Replace the control memory projection with
+  // result_phi_rawmem (unless we are only generating a slow call when
+  // both memory projections are combined)
   if (!always_slow && _memproj_fallthrough != NULL) {
     for (DUIterator_Fast imax, i = _memproj_fallthrough->fast_outs(imax); i < imax; i++) {
       Node *use = _memproj_fallthrough->fast_out(i);
@@ -1378,8 +1454,8 @@
       --i;
     }
   }
-  // Now change uses of _memproj_catchall to use _memproj_fallthrough and delete _memproj_catchall so
-  // we end up with a call that has only 1 memory projection
+  // Now change uses of _memproj_catchall to use _memproj_fallthrough and delete
+  // _memproj_catchall so we end up with a call that has only 1 memory projection.
   if (_memproj_catchall != NULL ) {
     if (_memproj_fallthrough == NULL) {
       _memproj_fallthrough = new (C, 1) ProjNode(call, TypeFunc::Memory);
@@ -1393,17 +1469,18 @@
       // back up iterator
       --i;
     }
+    assert(_memproj_catchall->outcnt() == 0, "all uses must be deleted");
+    _igvn.remove_dead_node(_memproj_catchall);
   }
 
-  // An allocate node has separate i_o projections for the uses on the control and i_o paths
-  // Replace uses of the control i_o projection with result_phi_i_o (unless we are only generating a slow call)
-  if (_ioproj_fallthrough == NULL) {
-    _ioproj_fallthrough = new (C, 1) ProjNode(call, TypeFunc::I_O);
-    transform_later(_ioproj_fallthrough);
-  } else if (!always_slow) {
+  // An allocate node has separate i_o projections for the uses on the control
+  // and i_o paths. Always replace the control i_o projection with result i_o
+  // otherwise incoming i_o become dead when only a slow call is generated
+  // (it is different from memory projections where both projections are
+  // combined in such case).
+  if (_ioproj_fallthrough != NULL) {
     for (DUIterator_Fast imax, i = _ioproj_fallthrough->fast_outs(imax); i < imax; i++) {
       Node *use = _ioproj_fallthrough->fast_out(i);
-
       _igvn.hash_delete(use);
       imax -= replace_input(use, _ioproj_fallthrough, result_phi_i_o);
       _igvn._worklist.push(use);
@@ -1411,9 +1488,13 @@
       --i;
     }
   }
-  // Now change uses of _ioproj_catchall to use _ioproj_fallthrough and delete _ioproj_catchall so
-  // we end up with a call that has only 1 control projection
+  // Now change uses of _ioproj_catchall to use _ioproj_fallthrough and delete
+  // _ioproj_catchall so we end up with a call that has only 1 i_o projection.
   if (_ioproj_catchall != NULL ) {
+    if (_ioproj_fallthrough == NULL) {
+      _ioproj_fallthrough = new (C, 1) ProjNode(call, TypeFunc::I_O);
+      transform_later(_ioproj_fallthrough);
+    }
     for (DUIterator_Fast imax, i = _ioproj_catchall->fast_outs(imax); i < imax; i++) {
       Node *use = _ioproj_catchall->fast_out(i);
       _igvn.hash_delete(use);
@@ -1422,11 +1503,25 @@
       // back up iterator
       --i;
     }
+    assert(_ioproj_catchall->outcnt() == 0, "all uses must be deleted");
+    _igvn.remove_dead_node(_ioproj_catchall);
   }
 
   // if we generated only a slow call, we are done
-  if (always_slow)
+  if (always_slow) {
+    // Now we can unhook i_o.
+    if (result_phi_i_o->outcnt() > 1) {
+      call->set_req(TypeFunc::I_O, top());
+    } else {
+      assert(result_phi_i_o->unique_ctrl_out() == call, "");
+      // Case of new array with negative size known during compilation.
+      // AllocateArrayNode::Ideal() optimization disconnect unreachable
+      // following code since call to runtime will throw exception.
+      // As result there will be no users of i_o after the call.
+      // Leave i_o attached to this call to avoid problems in preceding graph.
+    }
     return;
+  }
 
 
   if (_fallthroughcatchproj != NULL) {
@@ -1470,7 +1565,7 @@
   Node* mark_node = NULL;
   // For now only enable fast locking for non-array types
   if (UseBiasedLocking && (length == NULL)) {
-    mark_node = make_load(control, rawmem, klass_node, Klass::prototype_header_offset_in_bytes() + sizeof(oopDesc), TypeRawPtr::BOTTOM, T_ADDRESS);
+    mark_node = make_load(control, rawmem, klass_node, in_bytes(Klass::prototype_header_offset()), TypeRawPtr::BOTTOM, T_ADDRESS);
   } else {
     mark_node = makecon(TypeRawPtr::make((address)markOopDesc::prototype()));
   }
@@ -1701,7 +1796,8 @@
                          slow_call_address);
 }
 
-//-----------------------mark_eliminated_locking_nodes-----------------------
+//-------------------mark_eliminated_box----------------------------------
+//
 // During EA obj may point to several objects but after few ideal graph
 // transformations (CCP) it may point to only one non escaping object
 // (but still using phi), corresponding locks and unlocks will be marked
@@ -1712,62 +1808,148 @@
 // marked for elimination since new obj has no escape information.
 // Mark all associated (same box and obj) lock and unlock nodes for
 // elimination if some of them marked already.
-void PhaseMacroExpand::mark_eliminated_locking_nodes(AbstractLockNode *alock) {
-  if (!alock->is_eliminated()) {
+void PhaseMacroExpand::mark_eliminated_box(Node* oldbox, Node* obj) {
+  if (oldbox->as_BoxLock()->is_eliminated())
+    return; // This BoxLock node was processed already.
+
+  // New implementation (EliminateNestedLocks) has separate BoxLock
+  // node for each locked region so mark all associated locks/unlocks as
+  // eliminated even if different objects are referenced in one locked region
+  // (for example, OSR compilation of nested loop inside locked scope).
+  if (EliminateNestedLocks ||
+      oldbox->as_BoxLock()->is_simple_lock_region(NULL, obj)) {
+    // Box is used only in one lock region. Mark this box as eliminated.
+    _igvn.hash_delete(oldbox);
+    oldbox->as_BoxLock()->set_eliminated(); // This changes box's hash value
+    _igvn.hash_insert(oldbox);
+
+    for (uint i = 0; i < oldbox->outcnt(); i++) {
+      Node* u = oldbox->raw_out(i);
+      if (u->is_AbstractLock() && !u->as_AbstractLock()->is_non_esc_obj()) {
+        AbstractLockNode* alock = u->as_AbstractLock();
+        // Check lock's box since box could be referenced by Lock's debug info.
+        if (alock->box_node() == oldbox) {
+          // Mark eliminated all related locks and unlocks.
+          alock->set_non_esc_obj();
+        }
+      }
+    }
     return;
   }
-  if (!alock->is_coarsened()) { // Eliminated by EA
-      // Create new "eliminated" BoxLock node and use it
-      // in monitor debug info for the same object.
-      BoxLockNode* oldbox = alock->box_node()->as_BoxLock();
-      Node* obj = alock->obj_node();
-      if (!oldbox->is_eliminated()) {
-        BoxLockNode* newbox = oldbox->clone()->as_BoxLock();
+
+  // Create new "eliminated" BoxLock node and use it in monitor debug info
+  // instead of oldbox for the same object.
+  BoxLockNode* newbox = oldbox->clone()->as_BoxLock();
+
+  // Note: BoxLock node is marked eliminated only here and it is used
+  // to indicate that all associated lock and unlock nodes are marked
+  // for elimination.
+  newbox->set_eliminated();
+  transform_later(newbox);
+
+  // Replace old box node with new box for all users of the same object.
+  for (uint i = 0; i < oldbox->outcnt();) {
+    bool next_edge = true;
+
+    Node* u = oldbox->raw_out(i);
+    if (u->is_AbstractLock()) {
+      AbstractLockNode* alock = u->as_AbstractLock();
+      if (alock->box_node() == oldbox && alock->obj_node()->eqv_uncast(obj)) {
+        // Replace Box and mark eliminated all related locks and unlocks.
+        alock->set_non_esc_obj();
+        _igvn.hash_delete(alock);
+        alock->set_box_node(newbox);
+        _igvn._worklist.push(alock);
+        next_edge = false;
+      }
+    }
+    if (u->is_FastLock() && u->as_FastLock()->obj_node()->eqv_uncast(obj)) {
+      FastLockNode* flock = u->as_FastLock();
+      assert(flock->box_node() == oldbox, "sanity");
+      _igvn.hash_delete(flock);
+      flock->set_box_node(newbox);
+      _igvn._worklist.push(flock);
+      next_edge = false;
+    }
+
+    // Replace old box in monitor debug info.
+    if (u->is_SafePoint() && u->as_SafePoint()->jvms()) {
+      SafePointNode* sfn = u->as_SafePoint();
+      JVMState* youngest_jvms = sfn->jvms();
+      int max_depth = youngest_jvms->depth();
+      for (int depth = 1; depth <= max_depth; depth++) {
+        JVMState* jvms = youngest_jvms->of_depth(depth);
+        int num_mon  = jvms->nof_monitors();
+        // Loop over monitors
+        for (int idx = 0; idx < num_mon; idx++) {
+          Node* obj_node = sfn->monitor_obj(jvms, idx);
+          Node* box_node = sfn->monitor_box(jvms, idx);
+          if (box_node == oldbox && obj_node->eqv_uncast(obj)) {
+            int j = jvms->monitor_box_offset(idx);
+            _igvn.hash_delete(u);
+            u->set_req(j, newbox);
+            _igvn._worklist.push(u);
+            next_edge = false;
+          }
+        }
+      }
+    }
+    if (next_edge) i++;
+  }
+}
+
+//-----------------------mark_eliminated_locking_nodes-----------------------
+void PhaseMacroExpand::mark_eliminated_locking_nodes(AbstractLockNode *alock) {
+  if (EliminateNestedLocks) {
+    if (alock->is_nested()) {
+       assert(alock->box_node()->as_BoxLock()->is_eliminated(), "sanity");
+       return;
+    } else if (!alock->is_non_esc_obj()) { // Not eliminated or coarsened
+      // Only Lock node has JVMState needed here.
+      if (alock->jvms() != NULL && alock->as_Lock()->is_nested_lock_region()) {
+        // Mark eliminated related nested locks and unlocks.
+        Node* obj = alock->obj_node();
+        BoxLockNode* box_node = alock->box_node()->as_BoxLock();
+        assert(!box_node->is_eliminated(), "should not be marked yet");
         // Note: BoxLock node is marked eliminated only here
         // and it is used to indicate that all associated lock
         // and unlock nodes are marked for elimination.
-        newbox->set_eliminated();
-        transform_later(newbox);
-        // Replace old box node with new box for all users
-        // of the same object.
-        for (uint i = 0; i < oldbox->outcnt();) {
-
-          bool next_edge = true;
-          Node* u = oldbox->raw_out(i);
-          if (u->is_AbstractLock() &&
-              u->as_AbstractLock()->obj_node() == obj &&
-              u->as_AbstractLock()->box_node() == oldbox) {
-            // Mark all associated locks and unlocks.
-            u->as_AbstractLock()->set_eliminated();
-            _igvn.hash_delete(u);
-            u->set_req(TypeFunc::Parms + 1, newbox);
-            next_edge = false;
+        box_node->set_eliminated(); // Box's hash is always NO_HASH here
+        for (uint i = 0; i < box_node->outcnt(); i++) {
+          Node* u = box_node->raw_out(i);
+          if (u->is_AbstractLock()) {
+            alock = u->as_AbstractLock();
+            if (alock->box_node() == box_node) {
+              // Verify that this Box is referenced only by related locks.
+              assert(alock->obj_node()->eqv_uncast(obj), "");
+              // Mark all related locks and unlocks.
+              alock->set_nested();
+            }
           }
-          // Replace old box in monitor debug info.
-          if (u->is_SafePoint() && u->as_SafePoint()->jvms()) {
-            SafePointNode* sfn = u->as_SafePoint();
-            JVMState* youngest_jvms = sfn->jvms();
-            int max_depth = youngest_jvms->depth();
-            for (int depth = 1; depth <= max_depth; depth++) {
-              JVMState* jvms = youngest_jvms->of_depth(depth);
-              int num_mon  = jvms->nof_monitors();
-              // Loop over monitors
-              for (int idx = 0; idx < num_mon; idx++) {
-                Node* obj_node = sfn->monitor_obj(jvms, idx);
-                Node* box_node = sfn->monitor_box(jvms, idx);
-                if (box_node == oldbox && obj_node == obj) {
-                  int j = jvms->monitor_box_offset(idx);
-                  _igvn.hash_delete(u);
-                  u->set_req(j, newbox);
-                  next_edge = false;
-                }
-              } // for (int idx = 0;
-            } // for (int depth = 1;
-          } // if (u->is_SafePoint()
-          if (next_edge) i++;
-        } // for (uint i = 0; i < oldbox->outcnt();)
-      } // if (!oldbox->is_eliminated())
-  } // if (!alock->is_coarsened())
+        }
+      }
+      return;
+    }
+    // Process locks for non escaping object
+    assert(alock->is_non_esc_obj(), "");
+  } // EliminateNestedLocks
+
+  if (alock->is_non_esc_obj()) { // Lock is used for non escaping object
+    // Look for all locks of this object and mark them and
+    // corresponding BoxLock nodes as eliminated.
+    Node* obj = alock->obj_node();
+    for (uint j = 0; j < obj->outcnt(); j++) {
+      Node* o = obj->raw_out(j);
+      if (o->is_AbstractLock() &&
+          o->as_AbstractLock()->obj_node()->eqv_uncast(obj)) {
+        alock = o->as_AbstractLock();
+        Node* box = alock->box_node();
+        // Replace old box node with new eliminated box for all users
+        // of the same object and mark related locks as eliminated.
+        mark_eliminated_box(box, obj);
+      }
+    }
+  }
 }
 
 // we have determined that this lock/unlock can be eliminated, we simply
@@ -1782,7 +1964,7 @@
     return false;
   }
 #ifdef ASSERT
-  if (alock->is_Lock() && !alock->is_coarsened()) {
+  if (!alock->is_coarsened()) {
     // Check that new "eliminated" BoxLock node is created.
     BoxLockNode* oldbox = alock->box_node()->as_BoxLock();
     assert(oldbox->is_eliminated(), "should be done already");
@@ -1874,6 +2056,8 @@
   Node* box = lock->box_node();
   Node* flock = lock->fastlock_node();
 
+  assert(!box->as_BoxLock()->is_eliminated(), "sanity");
+
   // Make the merge point
   Node *region;
   Node *mem_phi;
@@ -1958,7 +2142,7 @@
 #endif
       klass_node->init_req(0, ctrl);
     }
-    Node *proto_node = make_load(ctrl, mem, klass_node, Klass::prototype_header_offset_in_bytes() + sizeof(oopDesc), TypeX_X, TypeX_X->basic_type());
+    Node *proto_node = make_load(ctrl, mem, klass_node, in_bytes(Klass::prototype_header_offset()), TypeX_X, TypeX_X->basic_type());
 
     Node* thread = transform_later(new (C, 1) ThreadLocalNode());
     Node* cast_thread = transform_later(new (C, 2) CastP2XNode(ctrl, thread));
@@ -2108,6 +2292,8 @@
   Node* obj = unlock->obj_node();
   Node* box = unlock->box_node();
 
+  assert(!box->as_BoxLock()->is_eliminated(), "sanity");
+
   // No need for a null check on unlock
 
   // Make the merge point
--- a/src/share/vm/opto/macro.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/opto/macro.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -92,6 +92,7 @@
   void process_users_of_allocation(AllocateNode *alloc);
 
   void eliminate_card_mark(Node *cm);
+  void mark_eliminated_box(Node* box, Node* obj);
   void mark_eliminated_locking_nodes(AbstractLockNode *alock);
   bool eliminate_locking_node(AbstractLockNode *alock);
   void expand_lock_node(LockNode *lock);
--- a/src/share/vm/opto/matcher.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/opto/matcher.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1365,31 +1365,36 @@
 
   const Type *t = m->bottom_type();
 
-  if( t->singleton() ) {
+  if (t->singleton()) {
     // Never force constants into registers.  Allow them to match as
     // constants or registers.  Copies of the same value will share
     // the same register.  See find_shared_node.
     return false;
   } else {                      // Not a constant
     // Stop recursion if they have different Controls.
-    // Slot 0 of constants is not really a Control.
-    if( control && m->in(0) && control != m->in(0) ) {
+    Node* m_control = m->in(0);
+    // Control of load's memory can post-dominates load's control.
+    // So use it since load can't float above its memory.
+    Node* mem_control = (m->is_Load()) ? m->in(MemNode::Memory)->in(0) : NULL;
+    if (control && m_control && control != m_control && control != mem_control) {
 
       // Actually, we can live with the most conservative control we
       // find, if it post-dominates the others.  This allows us to
       // pick up load/op/store trees where the load can float a little
       // above the store.
       Node *x = control;
-      const uint max_scan = 6;   // Arbitrary scan cutoff
+      const uint max_scan = 6;  // Arbitrary scan cutoff
       uint j;
-      for( j=0; j<max_scan; j++ ) {
-        if( x->is_Region() )    // Bail out at merge points
+      for (j=0; j<max_scan; j++) {
+        if (x->is_Region())     // Bail out at merge points
           return true;
         x = x->in(0);
-        if( x == m->in(0) )     // Does 'control' post-dominate
+        if (x == m_control)     // Does 'control' post-dominate
           break;                // m->in(0)?  If so, we can use it
+        if (x == mem_control)   // Does 'control' post-dominate
+          break;                // mem_control?  If so, we can use it
       }
-      if( j == max_scan )       // No post-domination before scan end?
+      if (j == max_scan)        // No post-domination before scan end?
         return true;            // Then break the match tree up
     }
     if (m->is_DecodeN() && Matcher::narrow_oop_use_complex_address()) {
--- a/src/share/vm/opto/memnode.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/opto/memnode.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1473,19 +1473,19 @@
 const Type*
 LoadNode::load_array_final_field(const TypeKlassPtr *tkls,
                                  ciKlass* klass) const {
-  if (tkls->offset() == Klass::modifier_flags_offset_in_bytes() + (int)sizeof(oopDesc)) {
+  if (tkls->offset() == in_bytes(Klass::modifier_flags_offset())) {
     // The field is Klass::_modifier_flags.  Return its (constant) value.
     // (Folds up the 2nd indirection in aClassConstant.getModifiers().)
     assert(this->Opcode() == Op_LoadI, "must load an int from _modifier_flags");
     return TypeInt::make(klass->modifier_flags());
   }
-  if (tkls->offset() == Klass::access_flags_offset_in_bytes() + (int)sizeof(oopDesc)) {
+  if (tkls->offset() == in_bytes(Klass::access_flags_offset())) {
     // The field is Klass::_access_flags.  Return its (constant) value.
     // (Folds up the 2nd indirection in Reflection.getClassAccessFlags(aClassConstant).)
     assert(this->Opcode() == Op_LoadI, "must load an int from _access_flags");
     return TypeInt::make(klass->access_flags());
   }
-  if (tkls->offset() == Klass::layout_helper_offset_in_bytes() + (int)sizeof(oopDesc)) {
+  if (tkls->offset() == in_bytes(Klass::layout_helper_offset())) {
     // The field is Klass::_layout_helper.  Return its constant value if known.
     assert(this->Opcode() == Op_LoadI, "must load an int from _layout_helper");
     return TypeInt::make(klass->layout_helper());
@@ -1636,14 +1636,14 @@
       // We are loading a field from a Klass metaobject whose identity
       // is known at compile time (the type is "exact" or "precise").
       // Check for fields we know are maintained as constants by the VM.
-      if (tkls->offset() == Klass::super_check_offset_offset_in_bytes() + (int)sizeof(oopDesc)) {
+      if (tkls->offset() == in_bytes(Klass::super_check_offset_offset())) {
         // The field is Klass::_super_check_offset.  Return its (constant) value.
         // (Folds up type checking code.)
         assert(Opcode() == Op_LoadI, "must load an int from _super_check_offset");
         return TypeInt::make(klass->super_check_offset());
       }
       // Compute index into primary_supers array
-      juint depth = (tkls->offset() - (Klass::primary_supers_offset_in_bytes() + (int)sizeof(oopDesc))) / sizeof(klassOop);
+      juint depth = (tkls->offset() - in_bytes(Klass::primary_supers_offset())) / sizeof(klassOop);
       // Check for overflowing; use unsigned compare to handle the negative case.
       if( depth < ciKlass::primary_super_limit() ) {
         // The field is an element of Klass::_primary_supers.  Return its (constant) value.
@@ -1654,14 +1654,14 @@
       }
       const Type* aift = load_array_final_field(tkls, klass);
       if (aift != NULL)  return aift;
-      if (tkls->offset() == in_bytes(arrayKlass::component_mirror_offset()) + (int)sizeof(oopDesc)
+      if (tkls->offset() == in_bytes(arrayKlass::component_mirror_offset())
           && klass->is_array_klass()) {
         // The field is arrayKlass::_component_mirror.  Return its (constant) value.
         // (Folds up aClassConstant.getComponentType, common in Arrays.copyOf.)
         assert(Opcode() == Op_LoadP, "must load an oop from _component_mirror");
         return TypeInstPtr::make(klass->as_array_klass()->component_mirror());
       }
-      if (tkls->offset() == Klass::java_mirror_offset_in_bytes() + (int)sizeof(oopDesc)) {
+      if (tkls->offset() == in_bytes(Klass::java_mirror_offset())) {
         // The field is Klass::_java_mirror.  Return its (constant) value.
         // (Folds up the 2nd indirection in anObjConstant.getClass().)
         assert(Opcode() == Op_LoadP, "must load an oop from _java_mirror");
@@ -1679,7 +1679,7 @@
       if( inner->is_instance_klass() &&
           !inner->as_instance_klass()->flags().is_interface() ) {
         // Compute index into primary_supers array
-        juint depth = (tkls->offset() - (Klass::primary_supers_offset_in_bytes() + (int)sizeof(oopDesc))) / sizeof(klassOop);
+        juint depth = (tkls->offset() - in_bytes(Klass::primary_supers_offset())) / sizeof(klassOop);
         // Check for overflowing; use unsigned compare to handle the negative case.
         if( depth < ciKlass::primary_super_limit() &&
             depth <= klass->super_depth() ) { // allow self-depth checks to handle self-check case
@@ -1695,7 +1695,7 @@
     // If the type is enough to determine that the thing is not an array,
     // we can give the layout_helper a positive interval type.
     // This will help short-circuit some reflective code.
-    if (tkls->offset() == Klass::layout_helper_offset_in_bytes() + (int)sizeof(oopDesc)
+    if (tkls->offset() == in_bytes(Klass::layout_helper_offset())
         && !klass->is_array_klass() // not directly typed as an array
         && !klass->is_interface()  // specifically not Serializable & Cloneable
         && !klass->is_java_lang_Object()   // not the supertype of all T[]
@@ -1718,8 +1718,10 @@
   bool is_instance = (tinst != NULL) && tinst->is_known_instance_field();
   if (ReduceFieldZeroing || is_instance) {
     Node* value = can_see_stored_value(mem,phase);
-    if (value != NULL && value->is_Con())
+    if (value != NULL && value->is_Con()) {
+      assert(value->bottom_type()->higher_equal(_type),"sanity");
       return value->bottom_type();
+    }
   }
 
   if (is_instance) {
@@ -1759,6 +1761,20 @@
   return LoadNode::Ideal(phase, can_reshape);
 }
 
+const Type* LoadBNode::Value(PhaseTransform *phase) const {
+  Node* mem = in(MemNode::Memory);
+  Node* value = can_see_stored_value(mem,phase);
+  if (value != NULL && value->is_Con() &&
+      !value->bottom_type()->higher_equal(_type)) {
+    // If the input to the store does not fit with the load's result type,
+    // it must be truncated. We can't delay until Ideal call since
+    // a singleton Value is needed for split_thru_phi optimization.
+    int con = value->get_int();
+    return TypeInt::make((con << 24) >> 24);
+  }
+  return LoadNode::Value(phase);
+}
+
 //--------------------------LoadUBNode::Ideal-------------------------------------
 //
 //  If the previous store is to the same address as this load,
@@ -1775,6 +1791,20 @@
   return LoadNode::Ideal(phase, can_reshape);
 }
 
+const Type* LoadUBNode::Value(PhaseTransform *phase) const {
+  Node* mem = in(MemNode::Memory);
+  Node* value = can_see_stored_value(mem,phase);
+  if (value != NULL && value->is_Con() &&
+      !value->bottom_type()->higher_equal(_type)) {
+    // If the input to the store does not fit with the load's result type,
+    // it must be truncated. We can't delay until Ideal call since
+    // a singleton Value is needed for split_thru_phi optimization.
+    int con = value->get_int();
+    return TypeInt::make(con & 0xFF);
+  }
+  return LoadNode::Value(phase);
+}
+
 //--------------------------LoadUSNode::Ideal-------------------------------------
 //
 //  If the previous store is to the same address as this load,
@@ -1791,6 +1821,20 @@
   return LoadNode::Ideal(phase, can_reshape);
 }
 
+const Type* LoadUSNode::Value(PhaseTransform *phase) const {
+  Node* mem = in(MemNode::Memory);
+  Node* value = can_see_stored_value(mem,phase);
+  if (value != NULL && value->is_Con() &&
+      !value->bottom_type()->higher_equal(_type)) {
+    // If the input to the store does not fit with the load's result type,
+    // it must be truncated. We can't delay until Ideal call since
+    // a singleton Value is needed for split_thru_phi optimization.
+    int con = value->get_int();
+    return TypeInt::make(con & 0xFFFF);
+  }
+  return LoadNode::Value(phase);
+}
+
 //--------------------------LoadSNode::Ideal--------------------------------------
 //
 //  If the previous store is to the same address as this load,
@@ -1809,6 +1853,20 @@
   return LoadNode::Ideal(phase, can_reshape);
 }
 
+const Type* LoadSNode::Value(PhaseTransform *phase) const {
+  Node* mem = in(MemNode::Memory);
+  Node* value = can_see_stored_value(mem,phase);
+  if (value != NULL && value->is_Con() &&
+      !value->bottom_type()->higher_equal(_type)) {
+    // If the input to the store does not fit with the load's result type,
+    // it must be truncated. We can't delay until Ideal call since
+    // a singleton Value is needed for split_thru_phi optimization.
+    int con = value->get_int();
+    return TypeInt::make((con << 16) >> 16);
+  }
+  return LoadNode::Value(phase);
+}
+
 //=============================================================================
 //----------------------------LoadKlassNode::make------------------------------
 // Polymorphic factory method:
@@ -1938,7 +1996,7 @@
     if( !klass->is_loaded() )
       return _type;             // Bail out if not loaded
     if( klass->is_obj_array_klass() &&
-        (uint)tkls->offset() == objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)) {
+        tkls->offset() == in_bytes(objArrayKlass::element_klass_offset())) {
       ciKlass* elem = klass->as_obj_array_klass()->element_klass();
       // // Always returning precise element type is incorrect,
       // // e.g., element type could be object and array may contain strings
@@ -1949,7 +2007,7 @@
       return TypeKlassPtr::make(tkls->ptr(), elem, 0/*offset*/);
     }
     if( klass->is_instance_klass() && tkls->klass_is_exact() &&
-        (uint)tkls->offset() == Klass::super_offset_in_bytes() + sizeof(oopDesc)) {
+        tkls->offset() == in_bytes(Klass::super_offset())) {
       ciKlass* sup = klass->as_instance_klass()->super();
       // The field is Klass::_super.  Return its (constant) value.
       // (Folds up the 2nd indirection in aClassConstant.getSuperClass().)
@@ -2013,11 +2071,11 @@
               tkls->klass()->is_array_klass())
           && adr2->is_AddP()
           ) {
-        int mirror_field = Klass::java_mirror_offset_in_bytes();
+        int mirror_field = in_bytes(Klass::java_mirror_offset());
         if (offset == java_lang_Class::array_klass_offset_in_bytes()) {
           mirror_field = in_bytes(arrayKlass::component_mirror_offset());
         }
-        if (tkls->offset() == mirror_field + (int)sizeof(oopDesc)) {
+        if (tkls->offset() == mirror_field) {
           return adr2->in(AddPNode::Base);
         }
       }
@@ -2201,7 +2259,7 @@
   // unsafe if I have intervening uses...  Also disallowed for StoreCM
   // since they must follow each StoreP operation.  Redundant StoreCMs
   // are eliminated just before matching in final_graph_reshape.
-  if (mem->is_Store() && phase->eqv_uncast(mem->in(MemNode::Address), address) &&
+  if (mem->is_Store() && mem->in(MemNode::Address)->eqv_uncast(address) &&
       mem->Opcode() != Op_StoreCM) {
     // Looking at a dead closed cycle of memory?
     assert(mem != mem->in(MemNode::Memory), "dead loop in StoreNode::Ideal");
@@ -2274,16 +2332,16 @@
 
   // Load then Store?  Then the Store is useless
   if (val->is_Load() &&
-      phase->eqv_uncast( val->in(MemNode::Address), adr ) &&
-      phase->eqv_uncast( val->in(MemNode::Memory ), mem ) &&
+      val->in(MemNode::Address)->eqv_uncast(adr) &&
+      val->in(MemNode::Memory )->eqv_uncast(mem) &&
       val->as_Load()->store_Opcode() == Opcode()) {
     return mem;
   }
 
   // Two stores in a row of the same value?
   if (mem->is_Store() &&
-      phase->eqv_uncast( mem->in(MemNode::Address), adr ) &&
-      phase->eqv_uncast( mem->in(MemNode::ValueIn), val ) &&
+      mem->in(MemNode::Address)->eqv_uncast(adr) &&
+      mem->in(MemNode::ValueIn)->eqv_uncast(val) &&
       mem->Opcode() == Opcode()) {
     return mem;
   }
@@ -2721,6 +2779,7 @@
   case Op_MemBarVolatile:  return new(C, len) MemBarVolatileNode(C, atp, pn);
   case Op_MemBarCPUOrder:  return new(C, len) MemBarCPUOrderNode(C, atp, pn);
   case Op_Initialize:      return new(C, len) InitializeNode(C,     atp, pn);
+  case Op_MemBarStoreStore: return new(C, len) MemBarStoreStoreNode(C,  atp, pn);
   default:                 ShouldNotReachHere(); return NULL;
   }
 }
@@ -2870,7 +2929,7 @@
 
 //---------------------------InitializeNode------------------------------------
 InitializeNode::InitializeNode(Compile* C, int adr_type, Node* rawoop)
-  : _is_complete(Incomplete),
+  : _is_complete(Incomplete), _does_not_escape(false),
     MemBarNode(C, adr_type, rawoop)
 {
   init_class_id(Class_Initialize);
--- a/src/share/vm/opto/memnode.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/opto/memnode.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -215,6 +215,7 @@
   virtual int Opcode() const;
   virtual uint ideal_reg() const { return Op_RegI; }
   virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const Type *Value(PhaseTransform *phase) const;
   virtual int store_Opcode() const { return Op_StoreB; }
   virtual BasicType memory_type() const { return T_BYTE; }
 };
@@ -228,6 +229,7 @@
   virtual int Opcode() const;
   virtual uint ideal_reg() const { return Op_RegI; }
   virtual Node* Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const Type *Value(PhaseTransform *phase) const;
   virtual int store_Opcode() const { return Op_StoreB; }
   virtual BasicType memory_type() const { return T_BYTE; }
 };
@@ -241,10 +243,25 @@
   virtual int Opcode() const;
   virtual uint ideal_reg() const { return Op_RegI; }
   virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const Type *Value(PhaseTransform *phase) const;
   virtual int store_Opcode() const { return Op_StoreC; }
   virtual BasicType memory_type() const { return T_CHAR; }
 };
 
+//------------------------------LoadSNode--------------------------------------
+// Load a short (16bits signed) from memory
+class LoadSNode : public LoadNode {
+public:
+  LoadSNode( Node *c, Node *mem, Node *adr, const TypePtr* at, const TypeInt *ti = TypeInt::SHORT )
+    : LoadNode(c,mem,adr,at,ti) {}
+  virtual int Opcode() const;
+  virtual uint ideal_reg() const { return Op_RegI; }
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const Type *Value(PhaseTransform *phase) const;
+  virtual int store_Opcode() const { return Op_StoreC; }
+  virtual BasicType memory_type() const { return T_SHORT; }
+};
+
 //------------------------------LoadINode--------------------------------------
 // Load an integer from memory
 class LoadINode : public LoadNode {
@@ -433,19 +450,6 @@
 };
 
 
-//------------------------------LoadSNode--------------------------------------
-// Load a short (16bits signed) from memory
-class LoadSNode : public LoadNode {
-public:
-  LoadSNode( Node *c, Node *mem, Node *adr, const TypePtr* at, const TypeInt *ti = TypeInt::SHORT )
-    : LoadNode(c,mem,adr,at,ti) {}
-  virtual int Opcode() const;
-  virtual uint ideal_reg() const { return Op_RegI; }
-  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
-  virtual int store_Opcode() const { return Op_StoreC; }
-  virtual BasicType memory_type() const { return T_SHORT; }
-};
-
 //------------------------------StoreNode--------------------------------------
 // Store value; requires Store, Address and Value
 class StoreNode : public MemNode {
@@ -918,6 +922,15 @@
   virtual int Opcode() const;
 };
 
+class MemBarStoreStoreNode: public MemBarNode {
+public:
+  MemBarStoreStoreNode(Compile* C, int alias_idx, Node* precedent)
+    : MemBarNode(C, alias_idx, precedent) {
+    init_class_id(Class_MemBarStoreStore);
+  }
+  virtual int Opcode() const;
+};
+
 // Ordering between a volatile store and a following volatile load.
 // Requires multi-CPU visibility?
 class MemBarVolatileNode: public MemBarNode {
@@ -950,6 +963,8 @@
   };
   int _is_complete;
 
+  bool _does_not_escape;
+
 public:
   enum {
     Control    = TypeFunc::Control,
@@ -989,6 +1004,9 @@
   void set_complete(PhaseGVN* phase);
   void set_complete_with_arraycopy() { _is_complete = Complete | WithArraycopy; }
 
+  bool does_not_escape() { return _does_not_escape; }
+  void set_does_not_escape() { _does_not_escape = true; }
+
 #ifdef ASSERT
   // ensure all non-degenerate stores are ordered and non-overlapping
   bool stores_are_sane(PhaseTransform* phase);
--- a/src/share/vm/opto/node.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/opto/node.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -833,8 +833,20 @@
 
 //---------------------------uncast_helper-------------------------------------
 Node* Node::uncast_helper(const Node* p) {
-  uint max_depth = 3;
-  for (uint i = 0; i < max_depth; i++) {
+#ifdef ASSERT
+  uint depth_count = 0;
+  const Node* orig_p = p;
+#endif
+
+  while (true) {
+#ifdef ASSERT
+    if (depth_count >= K) {
+      orig_p->dump(4);
+      if (p != orig_p)
+        p->dump(1);
+    }
+    assert(depth_count++ < K, "infinite loop in Node::uncast_helper");
+#endif
     if (p == NULL || p->req() != 2) {
       break;
     } else if (p->is_ConstraintCast()) {
--- a/src/share/vm/opto/node.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/opto/node.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -97,6 +97,7 @@
 class MachTempNode;
 class Matcher;
 class MemBarNode;
+class MemBarStoreStoreNode;
 class MemNode;
 class MergeMemNode;
 class MultiNode;
@@ -428,6 +429,10 @@
 
   // Strip away casting.  (It is depth-limited.)
   Node* uncast() const;
+  // Return whether two Nodes are equivalent, after stripping casting.
+  bool eqv_uncast(const Node* n) const {
+    return (this->uncast() == n->uncast());
+  }
 
 private:
   static Node* uncast_helper(const Node* n);
@@ -564,7 +569,8 @@
         DEFINE_CLASS_ID(NeverBranch, MultiBranch, 2)
       DEFINE_CLASS_ID(Start,       Multi, 2)
       DEFINE_CLASS_ID(MemBar,      Multi, 3)
-        DEFINE_CLASS_ID(Initialize,    MemBar, 0)
+        DEFINE_CLASS_ID(Initialize,       MemBar, 0)
+        DEFINE_CLASS_ID(MemBarStoreStore, MemBar, 1)
 
     DEFINE_CLASS_ID(Mach,  Node, 1)
       DEFINE_CLASS_ID(MachReturn, Mach, 0)
@@ -744,6 +750,7 @@
   DEFINE_CLASS_QUERY(MachTemp)
   DEFINE_CLASS_QUERY(Mem)
   DEFINE_CLASS_QUERY(MemBar)
+  DEFINE_CLASS_QUERY(MemBarStoreStore)
   DEFINE_CLASS_QUERY(MergeMem)
   DEFINE_CLASS_QUERY(Multi)
   DEFINE_CLASS_QUERY(MultiBranch)
--- a/src/share/vm/opto/output.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/opto/output.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -167,7 +167,7 @@
   // Determine if we need to generate a stack overflow check.
   // Do it if the method is not a stub function and
   // has java calls or has frame size > vm_page_size/8.
-  return (stub_function() == NULL &&
+  return (UseStackBanging && stub_function() == NULL &&
           (has_java_calls() || frame_size_in_bytes > os::vm_page_size()>>3));
 }
 
@@ -924,10 +924,10 @@
         scval = new ConstantOopWriteValue(tp->is_oopptr()->const_oop()->constant_encoding());
       }
 
-      OptoReg::Name box_reg = BoxLockNode::stack_slot(box_node);
+      OptoReg::Name box_reg = BoxLockNode::reg(box_node);
       Location basic_lock = Location::new_stk_loc(Location::normal,_regalloc->reg2offset(box_reg));
-      while( !box_node->is_BoxLock() )  box_node = box_node->in(1);
-      monarray->append(new MonitorValue(scval, basic_lock, box_node->as_BoxLock()->is_eliminated()));
+      bool eliminated = (box_node->is_BoxLock() && box_node->as_BoxLock()->is_eliminated());
+      monarray->append(new MonitorValue(scval, basic_lock, eliminated));
     }
 
     // We dump the object pool first, since deoptimization reads it in first.
--- a/src/share/vm/opto/parse1.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/opto/parse1.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1604,7 +1604,16 @@
           continue;
         default:                // All normal stuff
           if (phi == NULL) {
-            if (!check_elide_phi || !target->can_elide_SEL_phi(j)) {
+            const JVMState* jvms = map()->jvms();
+            if (EliminateNestedLocks &&
+                jvms->is_mon(j) && jvms->is_monitor_box(j)) {
+              // BoxLock nodes are not commoning.
+              // Use old BoxLock node as merged box.
+              assert(newin->jvms()->is_monitor_box(j), "sanity");
+              // This assert also tests that nodes are BoxLock.
+              assert(BoxLockNode::same_slot(n, m), "sanity");
+              C->gvn_replace_by(n, m);
+            } else if (!check_elide_phi || !target->can_elide_SEL_phi(j)) {
               phi = ensure_phi(j, nophi);
             }
           }
@@ -1911,7 +1920,7 @@
   Node* klass_addr = basic_plus_adr( receiver, receiver, oopDesc::klass_offset_in_bytes() );
   Node* klass = _gvn.transform( LoadKlassNode::make(_gvn, immutable_memory(), klass_addr, TypeInstPtr::KLASS) );
 
-  Node* access_flags_addr = basic_plus_adr(klass, klass, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc));
+  Node* access_flags_addr = basic_plus_adr(klass, klass, in_bytes(Klass::access_flags_offset()));
   Node* access_flags = make_load(NULL, access_flags_addr, TypeInt::INT, T_INT);
 
   Node* mask  = _gvn.transform(new (C, 3) AndINode(access_flags, intcon(JVM_ACC_HAS_FINALIZER)));
--- a/src/share/vm/opto/parseHelper.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/opto/parseHelper.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -71,14 +71,14 @@
   // Throw uncommon trap if class is not loaded or the value we are casting
   // _from_ is not loaded, and value is not null.  If the value _is_ NULL,
   // then the checkcast does nothing.
-  const TypeInstPtr *tp = _gvn.type(obj)->isa_instptr();
-  if (!will_link || (tp && !tp->is_loaded())) {
+  const TypeOopPtr *tp = _gvn.type(obj)->isa_oopptr();
+  if (!will_link || (tp && tp->klass() && !tp->klass()->is_loaded())) {
     if (C->log() != NULL) {
       if (!will_link) {
         C->log()->elem("assert_null reason='checkcast' klass='%d'",
                        C->log()->identify(klass));
       }
-      if (tp && !tp->is_loaded()) {
+      if (tp && tp->klass() && !tp->klass()->is_loaded()) {
         // %%% Cannot happen?
         C->log()->elem("assert_null reason='checkcast source' klass='%d'",
                        C->log()->identify(tp->klass()));
@@ -200,7 +200,7 @@
   // Come here for polymorphic array klasses
 
   // Extract the array element class
-  int element_klass_offset = objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc);
+  int element_klass_offset = in_bytes(objArrayKlass::element_klass_offset());
   Node *p2 = basic_plus_adr(array_klass, array_klass, element_klass_offset);
   Node *a_e_klass = _gvn.transform( LoadKlassNode::make(_gvn, immutable_memory(), p2, tak) );
 
@@ -220,7 +220,7 @@
   _gvn.set_type(merge, Type::CONTROL);
   Node* kls = makecon(TypeKlassPtr::make(klass));
 
-  Node* init_thread_offset = _gvn.MakeConX(instanceKlass::init_thread_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes());
+  Node* init_thread_offset = _gvn.MakeConX(in_bytes(instanceKlass::init_thread_offset()));
   Node* adr_node = basic_plus_adr(kls, kls, init_thread_offset);
   Node* init_thread = make_load(NULL, adr_node, TypeRawPtr::BOTTOM, T_ADDRESS);
   Node *tst   = Bool( CmpP( init_thread, cur_thread), BoolTest::eq);
@@ -228,9 +228,11 @@
   set_control(IfTrue(iff));
   merge->set_req(1, IfFalse(iff));
 
-  Node* init_state_offset = _gvn.MakeConX(instanceKlass::init_state_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes());
+  Node* init_state_offset = _gvn.MakeConX(in_bytes(instanceKlass::init_state_offset()));
   adr_node = basic_plus_adr(kls, kls, init_state_offset);
-  Node* init_state = make_load(NULL, adr_node, TypeInt::INT, T_INT);
+  // Use T_BOOLEAN for instanceKlass::_init_state so the compiler
+  // can generate code to load it as unsigned byte.
+  Node* init_state = make_load(NULL, adr_node, TypeInt::UBYTE, T_BOOLEAN);
   Node* being_init = _gvn.intcon(instanceKlass::being_initialized);
   tst   = Bool( CmpI( init_state, being_init), BoolTest::eq);
   iff = create_and_map_if(control(), tst, PROB_ALWAYS, COUNT_UNKNOWN);
--- a/src/share/vm/opto/phaseX.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/opto/phaseX.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -256,11 +256,6 @@
   // For pessimistic optimizations this is simply pointer equivalence.
   bool eqv(const Node* n1, const Node* n2) const { return n1 == n2; }
 
-  // Return whether two Nodes are equivalent, after stripping casting.
-  bool eqv_uncast(const Node* n1, const Node* n2) const {
-    return eqv(n1->uncast(), n2->uncast());
-  }
-
   // For pessimistic passes, the return type must monotonically narrow.
   // For optimistic  passes, the return type must monotonically widen.
   // It is possible to get into a "death march" in either type of pass,
--- a/src/share/vm/opto/postaloc.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/opto/postaloc.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -89,32 +89,62 @@
   return blk_adjust;
 }
 
+#ifdef ASSERT
+static bool expected_yanked_node(Node *old, Node *orig_old) {
+  // This code is expected only next original nodes:
+  // - load from constant table node which may have next data input nodes:
+  //     MachConstantBase, Phi, MachTemp, MachSpillCopy
+  // - load constant node which may have next data input nodes:
+  //     MachTemp, MachSpillCopy
+  // - MachSpillCopy
+  // - MachProj and Copy dead nodes
+  if (old->is_MachSpillCopy()) {
+    return true;
+  } else if (old->is_Con()) {
+    return true;
+  } else if (old->is_MachProj()) { // Dead kills projection of Con node
+    return (old == orig_old);
+  } else if (old->is_Copy()) {     // Dead copy of a callee-save value
+    return (old == orig_old);
+  } else if (old->is_MachTemp()) {
+    return orig_old->is_Con();
+  } else if (old->is_Phi() || old->is_MachConstantBase()) {
+    return (orig_old->is_Con() && orig_old->is_MachConstant());
+  }
+  return false;
+}
+#endif
+
 //------------------------------yank_if_dead-----------------------------------
-// Removed an edge from 'old'.  Yank if dead.  Return adjustment counts to
+// Removed edges from 'old'.  Yank if dead.  Return adjustment counts to
 // iterators in the current block.
-int PhaseChaitin::yank_if_dead( Node *old, Block *current_block, Node_List *value, Node_List *regnd ) {
+int PhaseChaitin::yank_if_dead_recurse(Node *old, Node *orig_old, Block *current_block,
+                                       Node_List *value, Node_List *regnd) {
   int blk_adjust=0;
-  while (old->outcnt() == 0 && old != C->top()) {
+  if (old->outcnt() == 0 && old != C->top()) {
+#ifdef ASSERT
+    if (!expected_yanked_node(old, orig_old)) {
+      tty->print_cr("==============================================");
+      tty->print_cr("orig_old:");
+      orig_old->dump();
+      tty->print_cr("old:");
+      old->dump();
+      assert(false, "unexpected yanked node");
+    }
+    if (old->is_Con())
+      orig_old = old; // Reset to satisfy expected nodes checks.
+#endif
     blk_adjust += yank(old, current_block, value, regnd);
 
-    Node *tmp = NULL;
     for (uint i = 1; i < old->req(); i++) {
-      if (old->in(i)->is_MachTemp()) {
-        // handle TEMP inputs
-        Node* machtmp = old->in(i);
-        if (machtmp->outcnt() == 1) {
-          assert(machtmp->unique_out() == old, "sanity");
-          blk_adjust += yank(machtmp, current_block, value, regnd);
-          machtmp->disconnect_inputs(NULL);
-        }
-      } else {
-        assert(tmp == NULL, "can't handle more non MachTemp inputs");
-        tmp = old->in(i);
+      Node* n = old->in(i);
+      if (n != NULL) {
+        old->set_req(i, NULL);
+        blk_adjust += yank_if_dead_recurse(n, orig_old, current_block, value, regnd);
       }
     }
+    // Disconnect control and remove precedence edges if any exist
     old->disconnect_inputs(NULL);
-    if( !tmp ) break;
-    old = tmp;
   }
   return blk_adjust;
 }
--- a/src/share/vm/opto/subnode.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/opto/subnode.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -91,7 +91,7 @@
 
   // Not correct for SubFnode and AddFNode (must check for infinity)
   // Equal?  Subtract is zero
-  if (phase->eqv_uncast(in1, in2))  return add_id();
+  if (in1->eqv_uncast(in2))  return add_id();
 
   // Either input is BOTTOM ==> the result is the local BOTTOM
   if( t1 == Type::BOTTOM || t2 == Type::BOTTOM )
--- a/src/share/vm/prims/jni.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/prims/jni.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -51,6 +51,7 @@
 #include "oops/typeArrayOop.hpp"
 #include "prims/jni.h"
 #include "prims/jniCheck.hpp"
+#include "prims/jniExport.hpp"
 #include "prims/jniFastGetField.hpp"
 #include "prims/jvm.h"
 #include "prims/jvm_misc.hpp"
@@ -69,6 +70,8 @@
 #include "runtime/signature.hpp"
 #include "runtime/vm_operations.hpp"
 #include "services/runtimeService.hpp"
+#include "trace/tracing.hpp"
+#include "trace/traceEventTypes.hpp"
 #include "utilities/defaultStream.hpp"
 #include "utilities/dtrace.hpp"
 #include "utilities/events.hpp"
@@ -5040,16 +5043,25 @@
 
 #ifndef PRODUCT
 
+#include "gc_interface/collectedHeap.hpp"
 #include "utilities/quickSort.hpp"
 
+#define run_unit_test(unit_test_function_call)              \
+  tty->print_cr("Running test: " #unit_test_function_call); \
+  unit_test_function_call
+
 void execute_internal_vm_tests() {
   if (ExecuteInternalVMTests) {
-    assert(QuickSort::test_quick_sort(), "test_quick_sort failed");
-    assert(arrayOopDesc::test_max_array_length(), "test_max_array_length failed");
+    tty->print_cr("Running internal VM tests");
+    run_unit_test(arrayOopDesc::test_max_array_length());
+    run_unit_test(CollectedHeap::test_is_in());
+    run_unit_test(QuickSort::test_quick_sort());
     tty->print_cr("All internal VM tests passed");
   }
 }
 
+#undef run_unit_test
+
 #endif
 
 #ifndef USDT2
@@ -5140,6 +5152,10 @@
        JvmtiExport::post_thread_start(thread);
     }
 
+    EVENT_BEGIN(TraceEventThreadStart, event);
+    EVENT_COMMIT(event,
+        EVENT_SET(event, javalangthread, java_lang_Thread::thread_id(thread->threadObj())));
+
     // Check if we should compile all classes on bootclasspath
     NOT_PRODUCT(if (CompileTheWorld) ClassLoader::compile_the_world();)
     // Since this is not a JVM_ENTRY we have to set the thread state manually before leaving.
@@ -5338,6 +5354,10 @@
     JvmtiExport::post_thread_start(thread);
   }
 
+  EVENT_BEGIN(TraceEventThreadStart, event);
+  EVENT_COMMIT(event,
+      EVENT_SET(event, javalangthread, java_lang_Thread::thread_id(thread->threadObj())));
+
   *(JNIEnv**)penv = thread->jni_environment();
 
   // Now leaving the VM, so change thread_state. This is normally automatically taken care
@@ -5465,8 +5485,7 @@
     return ret;
   }
 
-  if (JvmtiExport::is_jvmti_version(version)) {
-    ret = JvmtiExport::get_jvmti_interface(vm, penv, version);
+  if (JniExportedInterface::GetExportedInterface(vm, penv, version, &ret)) {
     return ret;
   }
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/prims/jniExport.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_PRIMS_JNI_EXPORT_HPP
+#define SHARE_VM_PRIMS_JNI_EXPORT_HPP
+
+#include "prims/jni.h"
+#include "prims/jvmtiExport.hpp"
+
+class JniExportedInterface {
+ public:
+  static bool GetExportedInterface(JavaVM* vm, void** penv, jint version, jint* iface) {
+    if (JvmtiExport::is_jvmti_version(version)) {
+      *iface = JvmtiExport::get_jvmti_interface(vm, penv, version);
+      return true;
+    }
+    return false;
+  }
+};
+
+#endif // SHARE_VM_PRIMS_JNI_EXPORT_HPP
--- a/src/share/vm/prims/jvm.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/prims/jvm.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -2716,7 +2716,9 @@
   }
   oop java_thread = JNIHandles::resolve_non_null(jthread);
   JavaThread* receiver = java_lang_Thread::thread(java_thread);
-  Events::log("JVM_StopThread thread JavaThread " INTPTR_FORMAT " as oop " INTPTR_FORMAT " [exception " INTPTR_FORMAT "]", receiver, (address)java_thread, throwable);
+  Events::log_exception(JavaThread::current(),
+                        "JVM_StopThread thread JavaThread " INTPTR_FORMAT " as oop " INTPTR_FORMAT " [exception " INTPTR_FORMAT "]",
+                        receiver, (address)java_thread, throwable);
   // First check if thread is alive
   if (receiver != NULL) {
     // Check if exception is getting thrown at self (use oop equality, since the
@@ -3515,14 +3517,14 @@
 JVM_LEAF(jint, JVM_Recv(jint fd, char *buf, jint nBytes, jint flags))
   JVMWrapper2("JVM_Recv (0x%x)", fd);
   //%note jvm_r6
-  return os::recv(fd, buf, nBytes, flags);
+  return os::recv(fd, buf, (size_t)nBytes, (uint)flags);
 JVM_END
 
 
 JVM_LEAF(jint, JVM_Send(jint fd, char *buf, jint nBytes, jint flags))
   JVMWrapper2("JVM_Send (0x%x)", fd);
   //%note jvm_r6
-  return os::send(fd, buf, nBytes, flags);
+  return os::send(fd, buf, (size_t)nBytes, (uint)flags);
 JVM_END
 
 
@@ -3543,42 +3545,51 @@
 JVM_LEAF(jint, JVM_Connect(jint fd, struct sockaddr *him, jint len))
   JVMWrapper2("JVM_Connect (0x%x)", fd);
   //%note jvm_r6
-  return os::connect(fd, him, len);
+  return os::connect(fd, him, (socklen_t)len);
 JVM_END
 
 
 JVM_LEAF(jint, JVM_Bind(jint fd, struct sockaddr *him, jint len))
   JVMWrapper2("JVM_Bind (0x%x)", fd);
   //%note jvm_r6
-  return os::bind(fd, him, len);
+  return os::bind(fd, him, (socklen_t)len);
 JVM_END
 
 
 JVM_LEAF(jint, JVM_Accept(jint fd, struct sockaddr *him, jint *len))
   JVMWrapper2("JVM_Accept (0x%x)", fd);
   //%note jvm_r6
-  return os::accept(fd, him, (int *)len);
+  socklen_t socklen = (socklen_t)(*len);
+  jint result = os::accept(fd, him, &socklen);
+  *len = (jint)socklen;
+  return result;
 JVM_END
 
 
 JVM_LEAF(jint, JVM_RecvFrom(jint fd, char *buf, int nBytes, int flags, struct sockaddr *from, int *fromlen))
   JVMWrapper2("JVM_RecvFrom (0x%x)", fd);
   //%note jvm_r6
-  return os::recvfrom(fd, buf, nBytes, flags, from, fromlen);
+  socklen_t socklen = (socklen_t)(*fromlen);
+  jint result = os::recvfrom(fd, buf, (size_t)nBytes, (uint)flags, from, &socklen);
+  *fromlen = (int)socklen;
+  return result;
 JVM_END
 
 
 JVM_LEAF(jint, JVM_GetSockName(jint fd, struct sockaddr *him, int *len))
   JVMWrapper2("JVM_GetSockName (0x%x)", fd);
   //%note jvm_r6
-  return os::get_sock_name(fd, him, len);
+  socklen_t socklen = (socklen_t)(*len);
+  jint result = os::get_sock_name(fd, him, &socklen);
+  *len = (int)socklen;
+  return result;
 JVM_END
 
 
 JVM_LEAF(jint, JVM_SendTo(jint fd, char *buf, int len, int flags, struct sockaddr *to, int tolen))
   JVMWrapper2("JVM_SendTo (0x%x)", fd);
   //%note jvm_r6
-  return os::sendto(fd, buf, len, flags, to, tolen);
+  return os::sendto(fd, buf, (size_t)len, (uint)flags, to, (socklen_t)tolen);
 JVM_END
 
 
@@ -3592,21 +3603,26 @@
 JVM_LEAF(jint, JVM_GetSockOpt(jint fd, int level, int optname, char *optval, int *optlen))
   JVMWrapper2("JVM_GetSockOpt (0x%x)", fd);
   //%note jvm_r6
-  return os::get_sock_opt(fd, level, optname, optval, optlen);
+  socklen_t socklen = (socklen_t)(*optlen);
+  jint result = os::get_sock_opt(fd, level, optname, optval, &socklen);
+  *optlen = (int)socklen;
+  return result;
 JVM_END
 
 
 JVM_LEAF(jint, JVM_SetSockOpt(jint fd, int level, int optname, const char *optval, int optlen))
   JVMWrapper2("JVM_GetSockOpt (0x%x)", fd);
   //%note jvm_r6
-  return os::set_sock_opt(fd, level, optname, optval, optlen);
+  return os::set_sock_opt(fd, level, optname, optval, (socklen_t)optlen);
 JVM_END
 
+
 JVM_LEAF(int, JVM_GetHostName(char* name, int namelen))
   JVMWrapper("JVM_GetHostName");
   return os::get_host_name(name, namelen);
 JVM_END
 
+
 // Library support ///////////////////////////////////////////////////////////////////////////
 
 JVM_ENTRY_NO_ENV(void*, JVM_LoadLibrary(const char* name))
@@ -3647,6 +3663,7 @@
   return os::dll_lookup(handle, name);
 JVM_END
 
+
 // Floating point support ////////////////////////////////////////////////////////////////////
 
 JVM_LEAF(jboolean, JVM_IsNaN(jdouble a))
@@ -3655,7 +3672,6 @@
 JVM_END
 
 
-
 // JNI version ///////////////////////////////////////////////////////////////////////////////
 
 JVM_LEAF(jboolean, JVM_IsSupportedJNIVersion(jint version))
--- a/src/share/vm/prims/jvmtiClassFileReconstituter.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/prims/jvmtiClassFileReconstituter.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -43,7 +43,7 @@
 #ifdef TARGET_ARCH_ppc
 # include "bytes_ppc.hpp"
 #endif
-// FIXME: add Deprecated, LVT, LVTT attributes
+// FIXME: add Deprecated, LVTT attributes
 // FIXME: fix Synthetic attribute
 // FIXME: per Serguei, add error return handling for constantPoolOopDesc::copy_cpool_bytes()
 
@@ -136,8 +136,9 @@
   constMethodHandle const_method(thread(), method->constMethod());
   u2 line_num_cnt = 0;
   int stackmap_len = 0;
+  int local_variable_table_length = 0;
 
-  // compute number and length of attributes -- FIXME: for now no LVT
+  // compute number and length of attributes
   int attr_count = 0;
   int attr_size = 0;
   if (const_method->has_linenumber_table()) {
@@ -170,6 +171,25 @@
       attr_size += 2 + 4 + stackmap_len;
     }
   }
+  if (method->has_localvariable_table()) {
+    local_variable_table_length = method->localvariable_table_length();
+    ++attr_count;
+    if (local_variable_table_length != 0) {
+      // Compute the size of the local variable table attribute (VM stores raw):
+      // LocalVariableTable_attribute {
+      //   u2 attribute_name_index;
+      //   u4 attribute_length;
+      //   u2 local_variable_table_length;
+      //   {
+      //     u2 start_pc;
+      //     u2 length;
+      //     u2 name_index;
+      //     u2 descriptor_index;
+      //     u2 index;
+      //   }
+      attr_size += 2 + 4 + 2 + local_variable_table_length * (2 + 2 + 2 + 2 + 2);
+    }
+  }
 
   typeArrayHandle exception_table(thread(), const_method->exception_table());
   int exception_table_length = exception_table->length();
@@ -203,8 +223,9 @@
   if (stackmap_len != 0) {
     write_stackmap_table_attribute(method, stackmap_len);
   }
-
-  // FIXME: write LVT attribute
+  if (local_variable_table_length != 0) {
+    write_local_variable_table_attribute(method, local_variable_table_length);
+  }
 }
 
 // Write Exceptions attribute
@@ -371,6 +392,36 @@
   }
 }
 
+// Write LineNumberTable attribute
+// JVMSpec|   LocalVariableTable_attribute {
+// JVMSpec|     u2 attribute_name_index;
+// JVMSpec|     u4 attribute_length;
+// JVMSpec|     u2 local_variable_table_length;
+// JVMSpec|     {  u2 start_pc;
+// JVMSpec|       u2 length;
+// JVMSpec|       u2 name_index;
+// JVMSpec|       u2 descriptor_index;
+// JVMSpec|       u2 index;
+// JVMSpec|     } local_variable_table[local_variable_table_length];
+// JVMSpec|   }
+void JvmtiClassFileReconstituter::write_local_variable_table_attribute(methodHandle method, u2 num_entries) {
+    write_attribute_name_index("LocalVariableTable");
+    write_u4(2 + num_entries * (2 + 2 + 2 + 2 + 2));
+    write_u2(num_entries);
+
+    assert(method->localvariable_table_length() == num_entries, "just checking");
+
+    LocalVariableTableElement *elem = method->localvariable_table_start();
+    for (int j=0; j<method->localvariable_table_length(); j++) {
+      write_u2(elem->start_bci);
+      write_u2(elem->length);
+      write_u2(elem->name_cp_index);
+      write_u2(elem->descriptor_cp_index);
+      write_u2(elem->slot);
+      elem++;
+    }
+}
+
 // Write stack map table attribute
 // JSR-202|   StackMapTable_attribute {
 // JSR-202|     u2 attribute_name_index;
--- a/src/share/vm/prims/jvmtiClassFileReconstituter.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/prims/jvmtiClassFileReconstituter.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -119,6 +119,7 @@
   void write_source_debug_extension_attribute();
   u2 line_number_table_entries(methodHandle method);
   void write_line_number_table_attribute(methodHandle method, u2 num_entries);
+  void write_local_variable_table_attribute(methodHandle method, u2 num_entries);
   void write_stackmap_table_attribute(methodHandle method, int stackmap_table_len);
   u2 inner_classes_attribute_length();
   void write_inner_classes_attribute(int length);
--- a/src/share/vm/prims/jvmtiEnv.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/prims/jvmtiEnv.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -267,7 +267,10 @@
 
     instanceKlassHandle ikh(current_thread, k_oop);
     if (ikh->get_cached_class_file_bytes() == NULL) {
-      // not cached, we need to reconstitute the class file from VM representation
+      // Not cached, we need to reconstitute the class file from the
+      // VM representation. We don't attach the reconstituted class
+      // bytes to the instanceKlass here because they have not been
+      // validated and we're not at a safepoint.
       constantPoolHandle  constants(current_thread, ikh->constants());
       ObjectLocker ol(constants, current_thread);    // lock constant pool while we query it
 
--- a/src/share/vm/prims/jvmtiExport.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/prims/jvmtiExport.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -538,8 +538,6 @@
     _curr_env = NULL;
     _cached_length_ptr = cached_length_ptr;
     _cached_data_ptr = cached_data_ptr;
-    *_cached_length_ptr = 0;
-    *_cached_data_ptr = NULL;
 
     _state = _thread->jvmti_thread_state();
     if (_state != NULL) {
--- a/src/share/vm/prims/jvmtiRedefineClasses.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/prims/jvmtiRedefineClasses.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -854,8 +854,9 @@
 
     // RC_TRACE_WITH_THREAD macro has an embedded ResourceMark
     RC_TRACE_WITH_THREAD(0x00000001, THREAD,
-      ("loading name=%s (avail_mem=" UINT64_FORMAT "K)",
-      the_class->external_name(), os::available_memory() >> 10));
+      ("loading name=%s kind=%d (avail_mem=" UINT64_FORMAT "K)",
+      the_class->external_name(), _class_load_kind,
+      os::available_memory() >> 10));
 
     ClassFileStream st((u1*) _class_defs[i].class_bytes,
       _class_defs[i].class_byte_count, (char *)"__VM_RedefineClasses__");
@@ -3205,8 +3206,20 @@
   // with them was cached on the scratch class, move to the_class.
   // Note: we still want to do this if nothing needed caching since it
   // should get cleared in the_class too.
-  the_class->set_cached_class_file(scratch_class->get_cached_class_file_bytes(),
-                                   scratch_class->get_cached_class_file_len());
+  if (the_class->get_cached_class_file_bytes() == 0) {
+    // the_class doesn't have a cache yet so copy it
+    the_class->set_cached_class_file(
+      scratch_class->get_cached_class_file_bytes(),
+      scratch_class->get_cached_class_file_len());
+  }
+#ifndef PRODUCT
+  else {
+    assert(the_class->get_cached_class_file_bytes() ==
+      scratch_class->get_cached_class_file_bytes(), "cache ptrs must match");
+    assert(the_class->get_cached_class_file_len() ==
+      scratch_class->get_cached_class_file_len(), "cache lens must match");
+  }
+#endif
 
   // Replace inner_classes
   typeArrayOop old_inner_classes = the_class->inner_classes();
--- a/src/share/vm/prims/jvmtiTagMap.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/prims/jvmtiTagMap.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -2999,7 +2999,8 @@
     char type = field->field_type();
     if (!is_primitive_field_type(type)) {
       oop fld_o = o->obj_field(field->field_offset());
-      if (fld_o != NULL) {
+      // ignore any objects that aren't visible to profiler
+      if (fld_o != NULL && ServiceUtil::visible_oop(fld_o)) {
         // reflection code may have a reference to a klassOop.
         // - see sun.reflect.UnsafeStaticFieldAccessorImpl and sun.misc.Unsafe
         if (fld_o->is_klass()) {
--- a/src/share/vm/prims/jvmtiThreadState.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/prims/jvmtiThreadState.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -319,6 +319,15 @@
   // clearing the flag indicates we are done with the PopFrame() dance
   clr_pending_step_for_popframe();
 
+  // If exception was thrown in this frame, need to reset jvmti thread state.
+  // Single stepping may not get enabled correctly by the agent since
+  // exception state is passed in MethodExit event which may be sent at some
+  // time in the future. JDWP agent ignores MethodExit events if caused by
+  // an exception.
+  //
+  if (is_exception_detected()) {
+    clear_exception_detected();
+  }
   // If step is pending for popframe then it may not be
   // a repeat step. The new_bci and method_id is same as current_bci
   // and current method_id after pop and step for recursive calls.
@@ -385,6 +394,15 @@
   // the ForceEarlyReturn() dance
   clr_pending_step_for_earlyret();
 
+  // If exception was thrown in this frame, need to reset jvmti thread state.
+  // Single stepping may not get enabled correctly by the agent since
+  // exception state is passed in MethodExit event which may be sent at some
+  // time in the future. JDWP agent ignores MethodExit events if caused by
+  // an exception.
+  //
+  if (is_exception_detected()) {
+    clear_exception_detected();
+  }
   // If step is pending for earlyret then it may not be a repeat step.
   // The new_bci and method_id is same as current_bci and current
   // method_id after earlyret and step for recursive calls.
--- a/src/share/vm/prims/jvmtiThreadState.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/prims/jvmtiThreadState.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -165,6 +165,10 @@
   inline bool is_exception_caught()    { return _exception_caught;  }
   inline void set_exception_detected() { _exception_detected = true;
                                          _exception_caught = false; }
+  inline void clear_exception_detected() {
+    _exception_detected = false;
+    assert(_exception_caught == false, "_exception_caught is out of phase");
+  }
   inline void set_exception_caught()   { _exception_caught = true;
                                          _exception_detected = false; }
 
--- a/src/share/vm/prims/methodHandles.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/prims/methodHandles.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -194,9 +194,6 @@
 // MethodHandles::generate_adapters
 //
 void MethodHandles::generate_adapters() {
-#ifdef TARGET_ARCH_NYI_6939861
-  if (FLAG_IS_DEFAULT(UseRicochetFrames))  UseRicochetFrames = false;
-#endif
   if (!EnableInvokeDynamic || SystemDictionary::MethodHandle_klass() == NULL)  return;
 
   assert(_adapter_code == NULL, "generate only once");
@@ -230,18 +227,6 @@
 }
 
 
-#ifdef TARGET_ARCH_NYI_6939861
-// these defs belong in methodHandles_<arch>.cpp
-frame MethodHandles::ricochet_frame_sender(const frame& fr, RegisterMap *map) {
-  ShouldNotCallThis();
-  return fr;
-}
-void MethodHandles::ricochet_frame_oops_do(const frame& fr, OopClosure* f, const RegisterMap* reg_map) {
-  ShouldNotCallThis();
-}
-#endif //TARGET_ARCH_NYI_6939861
-
-
 //------------------------------------------------------------------------------
 // MethodHandles::ek_supported
 //
@@ -251,28 +236,11 @@
   case _adapter_unused_13:
     return false;  // not defined yet
   case _adapter_prim_to_ref:
-    return UseRicochetFrames && conv_op_supported(java_lang_invoke_AdapterMethodHandle::OP_PRIM_TO_REF);
+    return conv_op_supported(java_lang_invoke_AdapterMethodHandle::OP_PRIM_TO_REF);
   case _adapter_collect_args:
-    return UseRicochetFrames && conv_op_supported(java_lang_invoke_AdapterMethodHandle::OP_COLLECT_ARGS);
+    return conv_op_supported(java_lang_invoke_AdapterMethodHandle::OP_COLLECT_ARGS);
   case _adapter_fold_args:
-    return UseRicochetFrames && conv_op_supported(java_lang_invoke_AdapterMethodHandle::OP_FOLD_ARGS);
-  case _adapter_opt_return_any:
-    return UseRicochetFrames;
-#ifdef TARGET_ARCH_NYI_6939861
-  // ports before 6939861 supported only three kinds of spread ops
-  case _adapter_spread_args:
-    // restrict spreads to three kinds:
-    switch (ek) {
-    case _adapter_opt_spread_0:
-    case _adapter_opt_spread_1:
-    case _adapter_opt_spread_more:
-      break;
-    default:
-      return false;
-      break;
-    }
-    break;
-#endif //TARGET_ARCH_NYI_6939861
+    return conv_op_supported(java_lang_invoke_AdapterMethodHandle::OP_FOLD_ARGS);
   }
   return true;
 }
@@ -1988,9 +1956,6 @@
     case _adapter_prim_to_ref:    // boxer MH to use
     case _adapter_collect_args:   // method handle which collects the args
     case _adapter_fold_args:      // method handle which collects the args
-      if (!UseRicochetFrames) {
-        { err = "box/collect/fold operators are not supported"; break; }
-      }
       if (!java_lang_invoke_MethodHandle::is_instance(argument()))
         { err = "MethodHandle adapter argument required"; break; }
       arg_mtype = Handle(THREAD, java_lang_invoke_MethodHandle::type(argument()));
@@ -2370,7 +2335,6 @@
 
   case _adapter_prim_to_ref:
     {
-      assert(UseRicochetFrames, "else don't come here");
       // vminfo will be the location to insert the return value
       vminfo = argslot;
       ek_opt = _adapter_opt_collect_ref;
@@ -2436,20 +2400,6 @@
 
   case _adapter_spread_args:
     {
-#ifdef TARGET_ARCH_NYI_6939861
-      // ports before 6939861 supported only three kinds of spread ops
-      if (!UseRicochetFrames) {
-        int array_size   = slots_pushed + 1;
-        assert(array_size >= 0, "");
-        vminfo = array_size;
-        switch (array_size) {
-        case 0:   ek_opt = _adapter_opt_spread_0;       break;
-        case 1:   ek_opt = _adapter_opt_spread_1;       break;
-        default:  ek_opt = _adapter_opt_spread_more;    break;
-        }
-        break;
-      }
-#endif //TARGET_ARCH_NYI_6939861
       // vminfo will be the required length of the array
       int array_size = (slots_pushed + 1) / (type2size[dest] == 2 ? 2 : 1);
       vminfo = array_size;
@@ -2494,7 +2444,6 @@
 
   case _adapter_collect_args:
     {
-      assert(UseRicochetFrames, "else don't come here");
       int elem_slots = argument_slot_count(java_lang_invoke_MethodHandle::type(argument()));
       // vminfo will be the location to insert the return value
       vminfo = argslot;
@@ -2563,7 +2512,6 @@
 
   case _adapter_fold_args:
     {
-      assert(UseRicochetFrames, "else don't come here");
       int elem_slots = argument_slot_count(java_lang_invoke_MethodHandle::type(argument()));
       // vminfo will be the location to insert the return value
       vminfo = argslot + elem_slots;
--- a/src/share/vm/prims/methodHandles.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/prims/methodHandles.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -738,46 +738,6 @@
 #ifdef TARGET_ARCH_ppc
 # include "methodHandles_ppc.hpp"
 #endif
-
-#ifdef TARGET_ARCH_NYI_6939861
-  // Here are some backward compatible declarations until the 6939861 ports are updated.
-  #define _adapter_flyby    (_EK_LIMIT + 10)
-  #define _adapter_ricochet (_EK_LIMIT + 11)
-  #define _adapter_opt_spread_1    _adapter_opt_spread_1_ref
-  #define _adapter_opt_spread_more _adapter_opt_spread_ref
-  enum {
-    _INSERT_NO_MASK   = -1,
-    _INSERT_REF_MASK  = 0,
-    _INSERT_INT_MASK  = 1,
-    _INSERT_LONG_MASK = 3
-  };
-  static void get_ek_bound_mh_info(EntryKind ek, BasicType& arg_type, int& arg_mask, int& arg_slots) {
-    arg_type = ek_bound_mh_arg_type(ek);
-    arg_mask = 0;
-    arg_slots = type2size[arg_type];;
-  }
-  static void get_ek_adapter_opt_swap_rot_info(EntryKind ek, int& swap_bytes, int& rotate) {
-    int swap_slots = ek_adapter_opt_swap_slots(ek);
-    rotate = ek_adapter_opt_swap_mode(ek);
-    swap_bytes = swap_slots * Interpreter::stackElementSize;
-  }
-  static int get_ek_adapter_opt_spread_info(EntryKind ek) {
-    return ek_adapter_opt_spread_count(ek);
-  }
-
-  static void insert_arg_slots(MacroAssembler* _masm,
-                               RegisterOrConstant arg_slots,
-                               int arg_mask,
-                               Register argslot_reg,
-                               Register temp_reg, Register temp2_reg, Register temp3_reg = noreg);
-
-  static void remove_arg_slots(MacroAssembler* _masm,
-                               RegisterOrConstant arg_slots,
-                               Register argslot_reg,
-                               Register temp_reg, Register temp2_reg, Register temp3_reg = noreg);
-
-  static void trace_method_handle(MacroAssembler* _masm, const char* adaptername) PRODUCT_RETURN;
-#endif //TARGET_ARCH_NYI_6939861
 };
 
 
--- a/src/share/vm/prims/nativeLookup.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/prims/nativeLookup.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -91,6 +91,19 @@
 }
 
 
+char* NativeLookup::critical_jni_name(methodHandle method) {
+  stringStream st;
+  // Prefix
+  st.print("JavaCritical_");
+  // Klass name
+  mangle_name_on(&st, method->klass_name());
+  st.print("_");
+  // Method name
+  mangle_name_on(&st, method->name());
+  return st.as_string();
+}
+
+
 char* NativeLookup::long_jni_name(methodHandle method) {
   // Signature ignore the wrapping parenteses and the trailing return type
   stringStream st;
@@ -193,6 +206,34 @@
 }
 
 
+address NativeLookup::lookup_critical_style(methodHandle method, char* pure_name, const char* long_name, int args_size, bool os_style) {
+  if (!method->has_native_function()) {
+    return NULL;
+  }
+
+  address current_entry = method->native_function();
+
+  char dll_name[JVM_MAXPATHLEN];
+  int offset;
+  if (os::dll_address_to_library_name(current_entry, dll_name, sizeof(dll_name), &offset)) {
+    char ebuf[32];
+    void* dll = os::dll_load(dll_name, ebuf, sizeof(ebuf));
+    if (dll != NULL) {
+      // Compute complete JNI name for style
+      stringStream st;
+      if (os_style) os::print_jni_name_prefix_on(&st, args_size);
+      st.print_raw(pure_name);
+      st.print_raw(long_name);
+      if (os_style) os::print_jni_name_suffix_on(&st, args_size);
+      char* jni_name = st.as_string();
+      return (address)os::dll_lookup(dll, jni_name);
+    }
+  }
+
+  return NULL;
+}
+
+
 // Check all the formats of native implementation name to see if there is one
 // for the specified method.
 address NativeLookup::lookup_entry(methodHandle method, bool& in_base_library, TRAPS) {
@@ -228,6 +269,58 @@
   return entry; // NULL indicates not found
 }
 
+// Check all the formats of native implementation name to see if there is one
+// for the specified method.
+address NativeLookup::lookup_critical_entry(methodHandle method) {
+  if (!CriticalJNINatives) return NULL;
+
+  if (method->is_synchronized() ||
+      !method->is_static()) {
+    // Only static non-synchronized methods are allowed
+    return NULL;
+  }
+
+  ResourceMark rm;
+  address entry = NULL;
+
+  Symbol* signature = method->signature();
+  for (int end = 0; end < signature->utf8_length(); end++) {
+    if (signature->byte_at(end) == 'L') {
+      // Don't allow object types
+      return NULL;
+    }
+  }
+
+  // Compute critical name
+  char* critical_name = critical_jni_name(method);
+
+  // Compute argument size
+  int args_size = 1                             // JNIEnv
+                + (method->is_static() ? 1 : 0) // class for static methods
+                + method->size_of_parameters(); // actual parameters
+
+
+  // 1) Try JNI short style
+  entry = lookup_critical_style(method, critical_name, "",        args_size, true);
+  if (entry != NULL) return entry;
+
+  // Compute long name
+  char* long_name = long_jni_name(method);
+
+  // 2) Try JNI long style
+  entry = lookup_critical_style(method, critical_name, long_name, args_size, true);
+  if (entry != NULL) return entry;
+
+  // 3) Try JNI short style without os prefix/suffix
+  entry = lookup_critical_style(method, critical_name, "",        args_size, false);
+  if (entry != NULL) return entry;
+
+  // 4) Try JNI long style without os prefix/suffix
+  entry = lookup_critical_style(method, critical_name, long_name, args_size, false);
+
+  return entry; // NULL indicates not found
+}
+
 // Check if there are any JVM TI prefixes which have been applied to the native method name.
 // If any are found, remove them before attemping the look up of the
 // native implementation again.
--- a/src/share/vm/prims/nativeLookup.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/prims/nativeLookup.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -36,15 +36,18 @@
   // JNI name computation
   static char* pure_jni_name(methodHandle method);
   static char* long_jni_name(methodHandle method);
+  static char* critical_jni_name(methodHandle method);
 
   // Style specific lookup
   static address lookup_style(methodHandle method, char* pure_name, const char* long_name, int args_size, bool os_style, bool& in_base_library, TRAPS);
+  static address lookup_critical_style(methodHandle method, char* pure_name, const char* long_name, int args_size, bool os_style);
   static address lookup_base (methodHandle method, bool& in_base_library, TRAPS);
   static address lookup_entry(methodHandle method, bool& in_base_library, TRAPS);
   static address lookup_entry_prefixed(methodHandle method, bool& in_base_library, TRAPS);
  public:
   // Lookup native function. May throw UnsatisfiedLinkError.
   static address lookup(methodHandle method, bool& in_base_library, TRAPS);
+  static address lookup_critical_entry(methodHandle method);
 
   // Lookup native functions in base library.
   static address base_library_lookup(const char* class_name, const char* method_name, const char* signature);
--- a/src/share/vm/runtime/advancedThresholdPolicy.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/runtime/advancedThresholdPolicy.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -156,20 +156,19 @@
 // Called with the queue locked and with at least one element
 CompileTask* AdvancedThresholdPolicy::select_task(CompileQueue* compile_queue) {
   CompileTask *max_task = NULL;
-  methodOop max_method;
+  methodHandle max_method;
   jlong t = os::javaTimeMillis();
   // Iterate through the queue and find a method with a maximum rate.
   for (CompileTask* task = compile_queue->first(); task != NULL;) {
     CompileTask* next_task = task->next();
-    methodOop method = (methodOop)JNIHandles::resolve(task->method_handle());
-    methodDataOop mdo = method->method_data();
-    update_rate(t, method);
+    methodHandle method = (methodOop)JNIHandles::resolve(task->method_handle());
+    update_rate(t, method());
     if (max_task == NULL) {
       max_task = task;
       max_method = method;
     } else {
       // If a method has been stale for some time, remove it from the queue.
-      if (is_stale(t, TieredCompileTaskTimeout, method) && !is_old(method)) {
+      if (is_stale(t, TieredCompileTaskTimeout, method()) && !is_old(method())) {
         if (PrintTieredEvents) {
           print_event(REMOVE_FROM_QUEUE, method, method, task->osr_bci(), (CompLevel)task->comp_level());
         }
@@ -181,7 +180,7 @@
       }
 
       // Select a method with a higher rate
-      if (compare_methods(method, max_method)) {
+      if (compare_methods(method(), max_method())) {
         max_task = task;
         max_method = method;
       }
@@ -190,7 +189,7 @@
   }
 
   if (max_task->comp_level() == CompLevel_full_profile && TieredStopAtLevel > CompLevel_full_profile
-      && is_method_profiled(max_method)) {
+      && is_method_profiled(max_method())) {
     max_task->set_comp_level(CompLevel_limited_profile);
     if (PrintTieredEvents) {
       print_event(UPDATE_IN_QUEUE, max_method, max_method, max_task->osr_bci(), (CompLevel)max_task->comp_level());
@@ -272,13 +271,10 @@
 }
 
 // Create MDO if necessary.
-void AdvancedThresholdPolicy::create_mdo(methodHandle mh, TRAPS) {
+void AdvancedThresholdPolicy::create_mdo(methodHandle mh, JavaThread* THREAD) {
   if (mh->is_native() || mh->is_abstract() || mh->is_accessor()) return;
   if (mh->method_data() == NULL) {
-    methodOopDesc::build_interpreter_method_data(mh, THREAD);
-    if (HAS_PENDING_EXCEPTION) {
-      CLEAR_PENDING_EXCEPTION;
-    }
+    methodOopDesc::build_interpreter_method_data(mh, CHECK_AND_CLEAR);
   }
 }
 
@@ -427,22 +423,22 @@
 }
 
 // Update the rate and submit compile
-void AdvancedThresholdPolicy::submit_compile(methodHandle mh, int bci, CompLevel level, TRAPS) {
+void AdvancedThresholdPolicy::submit_compile(methodHandle mh, int bci, CompLevel level, JavaThread* thread) {
   int hot_count = (bci == InvocationEntryBci) ? mh->invocation_count() : mh->backedge_count();
   update_rate(os::javaTimeMillis(), mh());
-  CompileBroker::compile_method(mh, bci, level, mh, hot_count, "tiered", THREAD);
+  CompileBroker::compile_method(mh, bci, level, mh, hot_count, "tiered", thread);
 }
 
 // Handle the invocation event.
 void AdvancedThresholdPolicy::method_invocation_event(methodHandle mh, methodHandle imh,
-                                                      CompLevel level, nmethod* nm, TRAPS) {
+                                                      CompLevel level, nmethod* nm, JavaThread* thread) {
   if (should_create_mdo(mh(), level)) {
-    create_mdo(mh, THREAD);
+    create_mdo(mh, thread);
   }
   if (is_compilation_enabled() && !CompileBroker::compilation_is_in_queue(mh, InvocationEntryBci)) {
     CompLevel next_level = call_event(mh(), level);
     if (next_level != level) {
-      compile(mh, InvocationEntryBci, next_level, THREAD);
+      compile(mh, InvocationEntryBci, next_level, thread);
     }
   }
 }
@@ -450,13 +446,13 @@
 // Handle the back branch event. Notice that we can compile the method
 // with a regular entry from here.
 void AdvancedThresholdPolicy::method_back_branch_event(methodHandle mh, methodHandle imh,
-                                                       int bci, CompLevel level, nmethod* nm, TRAPS) {
+                                                       int bci, CompLevel level, nmethod* nm, JavaThread* thread) {
   if (should_create_mdo(mh(), level)) {
-    create_mdo(mh, THREAD);
+    create_mdo(mh, thread);
   }
   // Check if MDO should be created for the inlined method
   if (should_create_mdo(imh(), level)) {
-    create_mdo(imh, THREAD);
+    create_mdo(imh, thread);
   }
 
   if (is_compilation_enabled()) {
@@ -464,7 +460,7 @@
     CompLevel max_osr_level = (CompLevel)imh->highest_osr_comp_level();
     // At the very least compile the OSR version
     if (!CompileBroker::compilation_is_in_queue(imh, bci) && next_osr_level != level) {
-      compile(imh, bci, next_osr_level, THREAD);
+      compile(imh, bci, next_osr_level, thread);
     }
 
     // Use loop event as an opportunity to also check if there's been
@@ -503,14 +499,14 @@
           next_level = CompLevel_full_profile;
         }
         if (cur_level != next_level) {
-          compile(mh, InvocationEntryBci, next_level, THREAD);
+          compile(mh, InvocationEntryBci, next_level, thread);
         }
       }
     } else {
       cur_level = comp_level(imh());
       next_level = call_event(imh(), cur_level);
       if (!CompileBroker::compilation_is_in_queue(imh, bci) && next_level != cur_level) {
-        compile(imh, InvocationEntryBci, next_level, THREAD);
+        compile(imh, InvocationEntryBci, next_level, thread);
       }
     }
   }
--- a/src/share/vm/runtime/advancedThresholdPolicy.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/runtime/advancedThresholdPolicy.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -197,7 +197,7 @@
   // determines whether we should do that.
   inline bool should_create_mdo(methodOop method, CompLevel cur_level);
   // Create MDO if necessary.
-  void create_mdo(methodHandle mh, TRAPS);
+  void create_mdo(methodHandle mh, JavaThread* thread);
   // Is method profiled enough?
   bool is_method_profiled(methodOop method);
 
@@ -208,12 +208,12 @@
   jlong start_time() const     { return _start_time; }
 
   // Submit a given method for compilation (and update the rate).
-  virtual void submit_compile(methodHandle mh, int bci, CompLevel level, TRAPS);
+  virtual void submit_compile(methodHandle mh, int bci, CompLevel level, JavaThread* thread);
   // event() from SimpleThresholdPolicy would call these.
   virtual void method_invocation_event(methodHandle method, methodHandle inlinee,
-                                       CompLevel level, nmethod* nm, TRAPS);
+                                       CompLevel level, nmethod* nm, JavaThread* thread);
   virtual void method_back_branch_event(methodHandle method, methodHandle inlinee,
-                                        int bci, CompLevel level, nmethod* nm, TRAPS);
+                                        int bci, CompLevel level, nmethod* nm, JavaThread* thread);
 public:
   AdvancedThresholdPolicy() : _start_time(0) { }
   // Select task is called by CompileBroker. We should return a task or NULL.
--- a/src/share/vm/runtime/arguments.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/runtime/arguments.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -109,8 +109,6 @@
 char* Arguments::_meta_index_path = NULL;
 char* Arguments::_meta_index_dir = NULL;
 
-static bool force_client_mode = false;
-
 // Check if head of 'option' matches 'name', and sets 'tail' remaining part of option string
 
 static bool match_option(const JavaVMOption *option, const char* name,
@@ -1018,6 +1016,13 @@
     UseInterpreter           = false;
     BackgroundCompilation    = false;
     ClipInlining             = false;
+    // Be much more aggressive in tiered mode with -Xcomp and exercise C2 more.
+    // We will first compile a level 3 version (C1 with full profiling), then do one invocation of it and
+    // compile a level 4 (C2) and then continue executing it.
+    if (TieredCompilation) {
+      Tier3InvokeNotifyFreqLog = 0;
+      Tier4InvocationThreshold = 0;
+    }
     break;
   }
 }
@@ -1051,6 +1056,16 @@
 }
 
 #ifndef KERNEL
+static void disable_adaptive_size_policy(const char* collector_name) {
+  if (UseAdaptiveSizePolicy) {
+    if (FLAG_IS_CMDLINE(UseAdaptiveSizePolicy)) {
+      warning("disabling UseAdaptiveSizePolicy; it is incompatible with %s.",
+              collector_name);
+    }
+    FLAG_SET_DEFAULT(UseAdaptiveSizePolicy, false);
+  }
+}
+
 // If the user has chosen ParallelGCThreads > 0, we set UseParNewGC
 // if it's not explictly set or unset. If the user has chosen
 // UseParNewGC and not explicitly set ParallelGCThreads we
@@ -1060,11 +1075,8 @@
          "control point invariant");
   assert(UseParNewGC, "Error");
 
-  // Turn off AdaptiveSizePolicy by default for parnew until it is
-  // complete.
-  if (FLAG_IS_DEFAULT(UseAdaptiveSizePolicy)) {
-    FLAG_SET_DEFAULT(UseAdaptiveSizePolicy, false);
-  }
+  // Turn off AdaptiveSizePolicy for parnew until it is complete.
+  disable_adaptive_size_policy("UseParNewGC");
 
   if (ParallelGCThreads == 0) {
     FLAG_SET_DEFAULT(ParallelGCThreads,
@@ -1121,11 +1133,8 @@
     FLAG_SET_ERGO(bool, UseParNewGC, true);
   }
 
-  // Turn off AdaptiveSizePolicy by default for cms until it is
-  // complete.
-  if (FLAG_IS_DEFAULT(UseAdaptiveSizePolicy)) {
-    FLAG_SET_DEFAULT(UseAdaptiveSizePolicy, false);
-  }
+  // Turn off AdaptiveSizePolicy for CMS until it is complete.
+  disable_adaptive_size_policy("UseConcMarkSweepGC");
 
   // In either case, adjust ParallelGCThreads and/or UseParNewGC
   // as needed.
@@ -1352,7 +1361,7 @@
     return;
   }
 
-  if (os::is_server_class_machine() && !force_client_mode ) {
+  if (os::is_server_class_machine()) {
     // If no other collector is requested explicitly,
     // let the VM select the collector based on
     // machine class and automatic selection policy.
@@ -1377,12 +1386,9 @@
   // by ergonomics.
   if (MaxHeapSize <= max_heap_for_compressed_oops()) {
 #if !defined(COMPILER1) || defined(TIERED)
-// disable UseCompressedOops by default on MacOS X until 7118647 is fixed
-#ifndef __APPLE__
     if (FLAG_IS_DEFAULT(UseCompressedOops)) {
       FLAG_SET_ERGO(bool, UseCompressedOops, true);
     }
-#endif // !__APPLE__
 #endif
 #ifdef _WIN64
     if (UseLargePages && UseCompressedOops) {
@@ -1407,10 +1413,11 @@
 
 void Arguments::set_parallel_gc_flags() {
   assert(UseParallelGC || UseParallelOldGC, "Error");
-  // If parallel old was requested, automatically enable parallel scavenge.
-  if (UseParallelOldGC && !UseParallelGC && FLAG_IS_DEFAULT(UseParallelGC)) {
-    FLAG_SET_DEFAULT(UseParallelGC, true);
+  // Enable ParallelOld unless it was explicitly disabled (cmd line or rc file).
+  if (FLAG_IS_DEFAULT(UseParallelOldGC)) {
+    FLAG_SET_DEFAULT(UseParallelOldGC, true);
   }
+  FLAG_SET_DEFAULT(UseParallelGC, true);
 
   // If no heap maximum was requested explicitly, use some reasonable fraction
   // of the physical memory, up to a maximum of 1GB.
@@ -2435,7 +2442,7 @@
 #ifndef PRODUCT
     // -Xprintflags
     } else if (match_option(option, "-Xprintflags", &tail)) {
-      CommandLineFlags::printFlags();
+      CommandLineFlags::printFlags(tty, false);
       vm_exit(0);
 #endif
     // -D
@@ -3051,11 +3058,6 @@
   // Construct the path to the archive
   char jvm_path[JVM_MAXPATHLEN];
   os::jvm_path(jvm_path, sizeof(jvm_path));
-#ifdef TIERED
-  if (strstr(jvm_path, "client") != NULL) {
-    force_client_mode = true;
-  }
-#endif // TIERED
   char *end = strrchr(jvm_path, *os::file_separator());
   if (end != NULL) *end = '\0';
   char *shared_archive_path = NEW_C_HEAP_ARRAY(char, strlen(jvm_path) +
@@ -3094,13 +3096,13 @@
       IgnoreUnrecognizedVMOptions = false;
     }
     if (match_option(option, "-XX:+PrintFlagsInitial", &tail)) {
-      CommandLineFlags::printFlags();
+      CommandLineFlags::printFlags(tty, false);
       vm_exit(0);
     }
 
 #ifndef PRODUCT
     if (match_option(option, "-XX:+PrintFlagsWithComments", &tail)) {
-      CommandLineFlags::printFlags(true);
+      CommandLineFlags::printFlags(tty, true);
       vm_exit(0);
     }
 #endif
@@ -3276,6 +3278,9 @@
   if (!UseBiasedLocking || EmitSync != 0) {
     UseOptoBiasInlining = false;
   }
+  if (!EliminateLocks) {
+    EliminateNestedLocks = false;
+  }
 #endif
 
   if (PrintAssembly && FLAG_IS_DEFAULT(DebugNonSafepoints)) {
@@ -3293,7 +3298,7 @@
 #endif
 
   if (PrintCommandLineFlags) {
-    CommandLineFlags::printSetFlags();
+    CommandLineFlags::printSetFlags(tty);
   }
 
   // Apply CPU specific policy for the BiasedLocking
--- a/src/share/vm/runtime/compilationPolicy.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/runtime/compilationPolicy.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -60,11 +60,11 @@
     break;
 
   case 1:
-//#ifdef COMPILER2
+#if defined(COMPILER2) || defined(GRAAL)
     CompilationPolicy::set_policy(new StackWalkCompPolicy());
-//#else
-//    Unimplemented();
-//#endif
+#else
+    Unimplemented();
+#endif
     break;
   case 2:
 #ifdef TIERED
@@ -306,29 +306,27 @@
   return (current >= initial + target);
 }
 
-nmethod* NonTieredCompPolicy::event(methodHandle method, methodHandle inlinee, int branch_bci, int bci, CompLevel comp_level, nmethod* nm, TRAPS) {
+nmethod* NonTieredCompPolicy::event(methodHandle method, methodHandle inlinee, int branch_bci,
+                                    int bci, CompLevel comp_level, nmethod* nm, JavaThread* thread) {
   assert(comp_level == CompLevel_none, "This should be only called from the interpreter");
   NOT_PRODUCT(trace_frequency_counter_overflow(method, branch_bci, bci));
-  if (JvmtiExport::can_post_interpreter_events()) {
-    assert(THREAD->is_Java_thread(), "Wrong type of thread");
-    if (((JavaThread*)THREAD)->is_interp_only_mode()) {
-      // If certain JVMTI events (e.g. frame pop event) are requested then the
-      // thread is forced to remain in interpreted code. This is
-      // implemented partly by a check in the run_compiled_code
-      // section of the interpreter whether we should skip running
-      // compiled code, and partly by skipping OSR compiles for
-      // interpreted-only threads.
-      if (bci != InvocationEntryBci) {
-        reset_counter_for_back_branch_event(method);
-        return NULL;
-      }
+  if (JvmtiExport::can_post_interpreter_events() && thread->is_interp_only_mode()) {
+    // If certain JVMTI events (e.g. frame pop event) are requested then the
+    // thread is forced to remain in interpreted code. This is
+    // implemented partly by a check in the run_compiled_code
+    // section of the interpreter whether we should skip running
+    // compiled code, and partly by skipping OSR compiles for
+    // interpreted-only threads.
+    if (bci != InvocationEntryBci) {
+      reset_counter_for_back_branch_event(method);
+      return NULL;
     }
   }
   if (bci == InvocationEntryBci) {
     // when code cache is full, compilation gets switched off, UseCompiler
     // is set to false
     if (!method->has_compiled_code() && UseCompiler) {
-      method_invocation_event(method, CHECK_NULL);
+      method_invocation_event(method, thread);
     } else {
       // Force counter overflow on method entry, even if no compilation
       // happened.  (The method_invocation_event call does this also.)
@@ -344,7 +342,7 @@
     NOT_PRODUCT(trace_osr_request(method, osr_nm, bci));
     // when code cache is full, we should not compile any more...
     if (osr_nm == NULL && UseCompiler) {
-      method_back_branch_event(method, bci, CHECK_NULL);
+      method_back_branch_event(method, bci, thread);
       osr_nm = method->lookup_osr_nmethod_for(bci, CompLevel_highest_tier, true);
     }
     if (osr_nm == NULL) {
@@ -395,7 +393,7 @@
 
 // SimpleCompPolicy - compile current method
 
-void SimpleCompPolicy::method_invocation_event( methodHandle m, TRAPS) {
+void SimpleCompPolicy::method_invocation_event(methodHandle m, JavaThread* thread) {
   int hot_count = m->invocation_count();
   reset_counter_for_invocation_event(m);
   const char* comment = "count";
@@ -405,36 +403,35 @@
     if (nm == NULL ) {
       const char* comment = "count";
       CompileBroker::compile_method(m, InvocationEntryBci, CompLevel_highest_tier,
-                                    m, hot_count, comment, CHECK);
+                                    m, hot_count, comment, thread);
     }
   }
 }
 
-void SimpleCompPolicy::method_back_branch_event(methodHandle m, int bci, TRAPS) {
+void SimpleCompPolicy::method_back_branch_event(methodHandle m, int bci, JavaThread* thread) {
   int hot_count = m->backedge_count();
   const char* comment = "backedge_count";
 
   if (is_compilation_enabled() && !m->is_not_osr_compilable() && can_be_compiled(m)) {
     CompileBroker::compile_method(m, bci, CompLevel_highest_tier,
-                                  m, hot_count, comment, CHECK);
+                                  m, hot_count, comment, thread);
     NOT_PRODUCT(trace_osr_completion(m->lookup_osr_nmethod_for(bci, CompLevel_highest_tier, true));)
   }
 }
 // StackWalkCompPolicy - walk up stack to find a suitable method to compile
 
-//#ifdef COMPILER2
+#if defined(COMPILER2) || defined(GRAAL)
 const char* StackWalkCompPolicy::_msg = NULL;
 
 
 // Consider m for compilation
-void StackWalkCompPolicy::method_invocation_event(methodHandle m, TRAPS) {
+void StackWalkCompPolicy::method_invocation_event(methodHandle m, JavaThread* thread) {
   int hot_count = m->invocation_count();
   reset_counter_for_invocation_event(m);
   const char* comment = "count";
 
   if (is_compilation_enabled() && m->code() == NULL && can_be_compiled(m)) {
-    ResourceMark rm(THREAD);
-    JavaThread *thread = (JavaThread*)THREAD;
+    ResourceMark rm(thread);
     frame       fr     = thread->last_frame();
     assert(fr.is_interpreted_frame(), "must be interpreted");
     assert(fr.interpreter_frame_method() == m(), "bad method");
@@ -461,17 +458,17 @@
       assert(top != NULL, "findTopInlinableFrame returned null");
       if (TraceCompilationPolicy) top->print();
       CompileBroker::compile_method(top->top_method(), InvocationEntryBci, CompLevel_highest_tier,
-                                    m, hot_count, comment, CHECK);
+                                    m, hot_count, comment, thread);
     }
   }
 }
 
-void StackWalkCompPolicy::method_back_branch_event(methodHandle m, int bci, TRAPS) {
+void StackWalkCompPolicy::method_back_branch_event(methodHandle m, int bci, JavaThread* thread) {
   int hot_count = m->backedge_count();
   const char* comment = "backedge_count";
 
   if (is_compilation_enabled() && !m->is_not_osr_compilable() && can_be_compiled(m)) {
-    CompileBroker::compile_method(m, bci, CompLevel_highest_tier, m, hot_count, comment, CHECK);
+    CompileBroker::compile_method(m, bci, CompLevel_highest_tier, m, hot_count, comment, thread);
 
     NOT_PRODUCT(trace_osr_completion(m->lookup_osr_nmethod_for(bci, CompLevel_highest_tier, true));)
   }
@@ -649,4 +646,4 @@
 
 
 
-//#endif // COMPILER2
+#endif // COMPILER2 || GRAAL
--- a/src/share/vm/runtime/compilationPolicy.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/runtime/compilationPolicy.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -64,7 +64,7 @@
   virtual int compiler_count(CompLevel comp_level) = 0;
   // main notification entry, return a pointer to an nmethod if the OSR is required,
   // returns NULL otherwise.
-  virtual nmethod* event(methodHandle method, methodHandle inlinee, int branch_bci, int bci, CompLevel comp_level, nmethod* nm, TRAPS) = 0;
+  virtual nmethod* event(methodHandle method, methodHandle inlinee, int branch_bci, int bci, CompLevel comp_level, nmethod* nm, JavaThread* thread) = 0;
   // safepoint() is called at the end of the safepoint
   virtual void do_safepoint_work() = 0;
   // reprofile request
@@ -105,15 +105,15 @@
   virtual bool is_mature(methodOop method);
   virtual void initialize();
   virtual CompileTask* select_task(CompileQueue* compile_queue);
-  virtual nmethod* event(methodHandle method, methodHandle inlinee, int branch_bci, int bci, CompLevel comp_level, nmethod* nm, TRAPS);
-  virtual void method_invocation_event(methodHandle m, TRAPS) = 0;
-  virtual void method_back_branch_event(methodHandle m, int bci, TRAPS) = 0;
+  virtual nmethod* event(methodHandle method, methodHandle inlinee, int branch_bci, int bci, CompLevel comp_level, nmethod* nm, JavaThread* thread);
+  virtual void method_invocation_event(methodHandle m, JavaThread* thread) = 0;
+  virtual void method_back_branch_event(methodHandle m, int bci, JavaThread* thread) = 0;
 };
 
 class SimpleCompPolicy : public NonTieredCompPolicy {
  public:
-  virtual void method_invocation_event(methodHandle m, TRAPS);
-  virtual void method_back_branch_event(methodHandle m, int bci, TRAPS);
+  virtual void method_invocation_event(methodHandle m, JavaThread* thread);
+  virtual void method_back_branch_event(methodHandle m, int bci, JavaThread* thread);
 };
 
 // StackWalkCompPolicy - existing C2 policy
@@ -121,8 +121,8 @@
 //#ifdef COMPILER2
 class StackWalkCompPolicy : public NonTieredCompPolicy {
  public:
-  virtual void method_invocation_event(methodHandle m, TRAPS);
-  virtual void method_back_branch_event(methodHandle m, int bci, TRAPS);
+  virtual void method_invocation_event(methodHandle m, JavaThread* thread);
+  virtual void method_back_branch_event(methodHandle m, int bci, JavaThread* thread);
 
  private:
   RFrame* findTopInlinableFrame(GrowableArray<RFrame*>* stack);
--- a/src/share/vm/runtime/deoptimization.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/runtime/deoptimization.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -19,6 +19,7 @@
  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  * or visit www.oracle.com if you need additional information or have any
  * questions.
+ *
  */
 
 #include "precompiled.hpp"
@@ -166,6 +167,7 @@
     tty->print("Deoptimization "); 
   }
   thread->inc_in_deopt_handler();
+
   return fetch_unroll_info_helper(thread);
 JRT_END
 
@@ -209,12 +211,11 @@
   assert(vf->is_compiled_frame(), "Wrong frame type");
   chunk->push(compiledVFrame::cast(vf));
 
-  // TODO(tw): Fix this hack after introducing GRAAL macro.
 #if defined(COMPILER2) || defined(GRAAL)
   // Reallocate the non-escaping objects and restore their fields. Then
   // relock objects if synchronization on them was eliminated.
 #ifdef COMPILER2
-  if (DoEscapeAnalysis) {
+  if (DoEscapeAnalysis || EliminateNestedLocks) {
     if (EliminateAllocations) {
 #endif // COMPILER2
       assert (chunk->at(0)->scope() != NULL,"expect only compiled java frames");
@@ -348,7 +349,6 @@
 
 #ifdef ASSERT
   assert(cb->is_deoptimization_stub() || cb->is_uncommon_trap_stub(), "just checking");
-  Events::log("fetch unroll sp " INTPTR_FORMAT, unpack_sp);
 #endif
 #else
   intptr_t* unpack_sp = stub_frame.sender(&dummy_map).unextended_sp();
@@ -586,6 +586,8 @@
     tty->print_cr("DEOPT UNPACKING thread " INTPTR_FORMAT " vframeArray " INTPTR_FORMAT " mode %d", thread, array, exec_mode);
   }
 #endif
+  Events::log(thread, "DEOPT UNPACKING pc=" INTPTR_FORMAT " sp=" INTPTR_FORMAT " mode %d",
+              stub_frame.pc(), stub_frame.sp(), exec_mode);
 
   UnrollBlock* info = array->unroll_block();
 
@@ -745,7 +747,7 @@
 }
 
 
-//#ifdef COMPILER2
+#if defined(COMPILER2) || defined(GRAAL)
 bool Deoptimization::realloc_objects(JavaThread* thread, frame* fr, GrowableArray<ScopeValue*>* objects, TRAPS) {
   Handle pending_exception(thread->pending_exception());
   const char* exception_file = thread->exception_file();
@@ -990,9 +992,10 @@
   }
 }
 #endif
-//#endif // COMPILER2
+#endif // COMPILER2 || GRAAL
 
 vframeArray* Deoptimization::create_vframeArray(JavaThread* thread, frame fr, RegisterMap *reg_map, GrowableArray<compiledVFrame*>* chunk) {
+  Events::log(thread, "DEOPT PACKING pc=" INTPTR_FORMAT " sp=" INTPTR_FORMAT, fr.pc(), fr.sp());
 
 #ifndef PRODUCT
   if (PrintDeoptimizationDetails) {
@@ -1038,7 +1041,6 @@
 
   // Compare the vframeArray to the collected vframes
   assert(array->structural_compare(thread, chunk), "just checking");
-  Events::log("# vframes = %d", (intptr_t)chunk->length());
 
 #ifndef PRODUCT
   if (PrintDeoptimizationDetails) {
@@ -1136,8 +1138,6 @@
 
   gather_statistics(Reason_constraint, Action_none, Bytecodes::_illegal);
 
-  EventMark m("Deoptimization (pc=" INTPTR_FORMAT ", sp=" INTPTR_FORMAT ")", fr.pc(), fr.id());
-
   // Patch the nmethod so that when execution returns to it we will
   // deopt the execution state and return to the interpreter.
   fr.deoptimize(thread);
@@ -1191,6 +1191,7 @@
 JRT_END
 
 
+#if defined(COMPILER2) || defined(SHARK) || defined(GRAAL)
 void Deoptimization::load_class_by_index(constantPoolHandle constant_pool, int index, TRAPS) {
   // in case of an unresolved klass entry, load the class.
   if (constant_pool->tag_at(index).is_unresolved_klass()) {
@@ -1250,6 +1251,10 @@
   // before we are done with it.
   nmethodLocker nl(fr.pc());
 
+  // Log a message
+  Events::log_deopt_message(thread, "Uncommon trap %d fr.pc " INTPTR_FORMAT,
+                            trap_request, fr.pc());
+
   {
     ResourceMark rm;
 
@@ -1260,7 +1265,6 @@
     DeoptAction action = trap_request_action(trap_request);
     jint unloaded_class_index = trap_request_index(trap_request); // CP idx or -1
 
-    Events::log("Uncommon trap occurred @" INTPTR_FORMAT " unloaded_class_index = %d", fr.pc(), (int) trap_request);
     vframe*  vf  = vframe::new_vframe(&fr, &reg_map, thread);
     compiledVFrame* cvf = compiledVFrame::cast(vf);
 
@@ -1962,3 +1966,40 @@
     if (xtty != NULL)  xtty->tail("statistics");
   }
 }
+#else // COMPILER2 || SHARK || GRAAL
+
+
+// Stubs for C1 only system.
+bool Deoptimization::trap_state_is_recompiled(int trap_state) {
+  return false;
+}
+
+const char* Deoptimization::trap_reason_name(int reason) {
+  return "unknown";
+}
+
+void Deoptimization::print_statistics() {
+  // no output
+}
+
+void
+Deoptimization::update_method_data_from_interpreter(methodDataHandle trap_mdo, int trap_bci, int reason) {
+  // no udpate
+}
+
+int Deoptimization::trap_state_has_reason(int trap_state, int reason) {
+  return 0;
+}
+
+void Deoptimization::gather_statistics(DeoptReason reason, DeoptAction action,
+                                       Bytecodes::Code bc) {
+  // no update
+}
+
+const char* Deoptimization::format_trap_state(char* buf, size_t buflen,
+                                              int trap_state) {
+  jio_snprintf(buf, buflen, "#%d", trap_state);
+  return buf;
+}
+
+#endif // COMPILER2 || SHARK || GRAAL
--- a/src/share/vm/runtime/dtraceJSDT.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/runtime/dtraceJSDT.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -82,7 +82,7 @@
 
   int handle = pd_activate((void*)probes,
     module_name, providers_count, providers);
-  if (handle <= 0) {
+  if (handle < 0) {
     delete probes;
     THROW_MSG_0(vmSymbols::java_lang_RuntimeException(),
       "Unable to register DTrace probes (internal error).");
--- a/src/share/vm/runtime/frame.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/runtime/frame.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -570,7 +570,7 @@
     InterpreterCodelet* desc = Interpreter::codelet_containing(pc());
     if (desc != NULL) {
       st->print("~");
-      desc->print();
+      desc->print_on(st);
       NOT_PRODUCT(begin = desc->code_begin(); end = desc->code_end();)
     } else {
       st->print("~interpreter");
@@ -1336,7 +1336,6 @@
 }
 #endif
 
-
 #ifdef ASSERT
 void frame::interpreter_frame_verify_monitor(BasicObjectLock* value) const {
   assert(is_interpreted_frame(), "Not an interpreted frame");
@@ -1352,27 +1351,35 @@
   guarantee((current - low_mark) % monitor_size  ==  0         , "Misaligned bottom of BasicObjectLock*");
   guarantee( current >= low_mark                               , "Current BasicObjectLock* below than low_mark");
 }
+#endif
 
+#ifndef PRODUCT
+void frame::describe(FrameValues& values, int frame_no) {
+  // boundaries: sp and the 'real' frame pointer
+  values.describe(-1, sp(), err_msg("sp for #%d", frame_no), 1);
+  intptr_t* frame_pointer = real_fp(); // Note: may differ from fp()
 
-void frame::describe(FrameValues& values, int frame_no) {
+  // print frame info at the highest boundary
+  intptr_t* info_address = MAX2(sp(), frame_pointer);
+
+  if (info_address != frame_pointer) {
+    // print frame_pointer explicitly if not marked by the frame info
+    values.describe(-1, frame_pointer, err_msg("frame pointer for #%d", frame_no), 1);
+  }
+
   if (is_entry_frame() || is_compiled_frame() || is_interpreted_frame() || is_native_frame()) {
     // Label values common to most frames
     values.describe(-1, unextended_sp(), err_msg("unextended_sp for #%d", frame_no));
-    values.describe(-1, sp(), err_msg("sp for #%d", frame_no));
-    if (is_compiled_frame()) {
-      values.describe(-1, sp() + _cb->frame_size(), err_msg("computed fp for #%d", frame_no));
-    } else {
-      values.describe(-1, fp(), err_msg("fp for #%d", frame_no));
-    }
   }
+
   if (is_interpreted_frame()) {
     methodOop m = interpreter_frame_method();
     int bci = interpreter_frame_bci();
 
     // Label the method and current bci
-    values.describe(-1, MAX2(sp(), fp()),
+    values.describe(-1, info_address,
                     FormatBuffer<1024>("#%d method %s @ %d", frame_no, m->name_and_sig_as_C_string(), bci), 2);
-    values.describe(-1, MAX2(sp(), fp()),
+    values.describe(-1, info_address,
                     err_msg("- %d locals %d max stack", m->max_locals(), m->max_stack()), 1);
     if (m->max_locals() > 0) {
       intptr_t* l0 = interpreter_frame_local_at(0);
@@ -1404,21 +1411,36 @@
     }
   } else if (is_entry_frame()) {
     // For now just label the frame
-    values.describe(-1, MAX2(sp(), fp()), err_msg("#%d entry frame", frame_no), 2);
+    values.describe(-1, info_address, err_msg("#%d entry frame", frame_no), 2);
   } else if (is_compiled_frame()) {
     // For now just label the frame
     nmethod* nm = cb()->as_nmethod_or_null();
-    values.describe(-1, MAX2(sp(), fp()),
+    values.describe(-1, info_address,
                     FormatBuffer<1024>("#%d nmethod " INTPTR_FORMAT " for method %s%s", frame_no,
                                        nm, nm->method()->name_and_sig_as_C_string(),
-                                       is_deoptimized_frame() ? " (deoptimized" : ""), 2);
+                                       (_deopt_state == is_deoptimized) ?
+                                       " (deoptimized)" :
+                                       ((_deopt_state == unknown) ? " (state unknown)" : "")),
+                    2);
   } else if (is_native_frame()) {
     // For now just label the frame
     nmethod* nm = cb()->as_nmethod_or_null();
-    values.describe(-1, MAX2(sp(), fp()),
+    values.describe(-1, info_address,
                     FormatBuffer<1024>("#%d nmethod " INTPTR_FORMAT " for native method %s", frame_no,
                                        nm, nm->method()->name_and_sig_as_C_string()), 2);
+  } else if (is_ricochet_frame()) {
+      values.describe(-1, info_address, err_msg("#%d ricochet frame", frame_no), 2);
+  } else {
+    // provide default info if not handled before
+    char *info = (char *) "special frame";
+    if ((_cb != NULL) &&
+        (_cb->name() != NULL)) {
+      info = (char *)_cb->name();
+    }
+    values.describe(-1, info_address, err_msg("#%d <%s>", frame_no, info), 2);
   }
+
+  // platform dependent additional data
   describe_pd(values, frame_no);
 }
 
@@ -1435,7 +1457,7 @@
 }
 
 
-#ifdef ASSERT
+#ifndef PRODUCT
 
 void FrameValues::describe(int owner, intptr_t* location, const char* description, int priority) {
   FrameValue fv;
@@ -1448,6 +1470,7 @@
 }
 
 
+#ifdef ASSERT
 void FrameValues::validate() {
   _values.sort(compare);
   bool error = false;
@@ -1473,7 +1496,7 @@
   }
   assert(!error, "invalid layout");
 }
-
+#endif // ASSERT
 
 void FrameValues::print(JavaThread* thread) {
   _values.sort(compare);
@@ -1522,4 +1545,4 @@
   }
 }
 
-#endif
+#endif // ndef PRODUCT
--- a/src/share/vm/runtime/frame.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/runtime/frame.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -222,6 +222,15 @@
   // returns the stack pointer of the calling frame
   intptr_t* sender_sp() const;
 
+  // Returns the real 'frame pointer' for the current frame.
+  // This is the value expected by the platform ABI when it defines a
+  // frame pointer register. It may differ from the effective value of
+  // the FP register when that register is used in the JVM for other
+  // purposes (like compiled frames on some platforms).
+  // On other platforms, it is defined so that the stack area used by
+  // this frame goes from real_fp() to sp().
+  intptr_t* real_fp() const;
+
   // Deoptimization info, if needed (platform dependent).
   // Stored in the initial_info field of the unroll info, to be used by
   // the platform dependent deoptimization blobs.
@@ -487,7 +496,7 @@
 
 };
 
-#ifdef ASSERT
+#ifndef PRODUCT
 // A simple class to describe a location on the stack
 class FrameValue VALUE_OBJ_CLASS_SPEC {
  public:
@@ -517,7 +526,9 @@
   // Used by frame functions to describe locations.
   void describe(int owner, intptr_t* location, const char* description, int priority = 0);
 
+#ifdef ASSERT
   void validate();
+#endif
   void print(JavaThread* thread);
 };
 
--- a/src/share/vm/runtime/globals.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/runtime/globals.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -82,16 +82,19 @@
 }
 
 bool Flag::is_writeable() const {
-  return (strcmp(kind, "{manageable}") == 0 || strcmp(kind, "{product rw}") == 0);
+  return strcmp(kind, "{manageable}") == 0 ||
+         strcmp(kind, "{product rw}") == 0 ||
+         is_writeable_ext();
 }
 
-// All flags except "manageable" are assumed internal flags.
+// All flags except "manageable" are assumed to be internal flags.
 // Long term, we need to define a mechanism to specify which flags
 // are external/stable and change this function accordingly.
 bool Flag::is_external() const {
-  return (strcmp(kind, "{manageable}") == 0);
+  return strcmp(kind, "{manageable}") == 0 || is_external_ext();
 }
 
+
 // Length of format string (e.g. "%.1234s") for printing ccstr below
 #define FORMAT_BUFFER_LEN 16
 
@@ -485,7 +488,7 @@
   }
 }
 
-void CommandLineFlags::printSetFlags() {
+void CommandLineFlags::printSetFlags(outputStream* out) {
   // Print which flags were set on the command line
   // note: this method is called before the thread structure is in place
   //       which means resource allocation cannot be used.
@@ -504,11 +507,11 @@
   // Print
   for (int i = 0; i < length; i++) {
     if (array[i]->origin /* naked field! */) {
-      array[i]->print_as_flag(tty);
-      tty->print(" ");
+      array[i]->print_as_flag(out);
+      out->print(" ");
     }
   }
-  tty->cr();
+  out->cr();
   FREE_C_HEAP_ARRAY(Flag*, array);
 }
 
@@ -521,7 +524,7 @@
 
 #endif // PRODUCT
 
-void CommandLineFlags::printFlags(bool withComments) {
+void CommandLineFlags::printFlags(outputStream* out, bool withComments) {
   // Print the flags sorted by name
   // note: this method is called before the thread structure is in place
   //       which means resource allocation cannot be used.
@@ -538,10 +541,10 @@
   qsort(array, length, sizeof(Flag*), compare_flags);
 
   // Print
-  tty->print_cr("[Global flags]");
+  out->print_cr("[Global flags]");
   for (int i = 0; i < length; i++) {
     if (array[i]->is_unlocked()) {
-      array[i]->print_on(tty, withComments);
+      array[i]->print_on(out, withComments);
     }
   }
   FREE_C_HEAP_ARRAY(Flag*, array);
--- a/src/share/vm/runtime/globals.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/runtime/globals.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -26,6 +26,17 @@
 #define SHARE_VM_RUNTIME_GLOBALS_HPP
 
 #include "utilities/debug.hpp"
+
+// use this for flags that are true per default in the tiered build
+// but false in non-tiered builds, and vice versa
+#ifdef TIERED
+#define  trueInTiered true
+#define falseInTiered false
+#else
+#define  trueInTiered false
+#define falseInTiered true
+#endif
+
 #ifdef TARGET_ARCH_x86
 # include "globals_x86.hpp"
 #endif
@@ -245,6 +256,8 @@
 
   bool is_unlocker_ext() const;
   bool is_unlocked_ext() const;
+  bool is_writeable_ext() const;
+  bool is_external_ext() const;
 
   void print_on(outputStream* st, bool withComments = false );
   void print_as_flag(outputStream* st);
@@ -324,9 +337,9 @@
 
   // Returns false if name is not a command line flag.
   static bool wasSetOnCmdline(const char* name, bool* value);
-  static void printSetFlags();
+  static void printSetFlags(outputStream* out);
 
-  static void printFlags(bool withComments = false );
+  static void printFlags(outputStream* out, bool withComments);
 
   static void verify() PRODUCT_RETURN;
 };
@@ -351,16 +364,6 @@
 #define falseInProduct true
 #endif
 
-// use this for flags that are true per default in the tiered build
-// but false in non-tiered builds, and vice versa
-#ifdef TIERED
-#define  trueInTiered true
-#define falseInTiered false
-#else
-#define  trueInTiered false
-#define falseInTiered true
-#endif
-
 #ifdef JAVASE_EMBEDDED
 #define falseInEmbedded false
 #else
@@ -525,6 +528,9 @@
   product(intx, UseSSE, 99,                                                 \
           "Highest supported SSE instructions set on x86/x64")              \
                                                                             \
+  product(intx, UseAVX, 99,                                                 \
+          "Highest supported AVX instructions set on x86/x64")              \
+                                                                            \
   product(intx, UseVIS, 99,                                                 \
           "Highest supported VIS instructions set on Sparc")                \
                                                                             \
@@ -653,6 +659,12 @@
   develop(bool, SpecialArraysEquals, true,                                  \
           "special version of Arrays.equals(char[],char[])")                \
                                                                             \
+  product(bool, CriticalJNINatives, true,                                   \
+          "check for critical JNI entry points")                            \
+                                                                            \
+  notproduct(bool, StressCriticalJNINatives, false,                         \
+            "Exercise register saving code in critical natives")            \
+                                                                            \
   product(bool, UseSSE42Intrinsics, false,                                  \
           "SSE4.2 versions of intrinsics")                                  \
                                                                             \
@@ -730,8 +742,11 @@
   product(bool, MaxFDLimit, true,                                           \
           "Bump the number of file descriptors to max in solaris.")         \
                                                                             \
-  notproduct(bool, LogEvents, trueInDebug,                                  \
-          "Enable Event log")                                               \
+  diagnostic(bool, LogEvents, true,                                         \
+             "Enable the various ring buffer event logs")                   \
+                                                                            \
+  diagnostic(intx, LogEventsBufferEntries, 10,                              \
+             "Enable the various ring buffer event logs")                   \
                                                                             \
   product(bool, BytecodeVerificationRemote, true,                           \
           "Enables the Java bytecode verifier for remote classes")          \
@@ -1040,6 +1055,9 @@
   notproduct(bool, PrintSystemDictionaryAtExit, false,                      \
           "Prints the system dictionary at exit")                           \
                                                                             \
+  experimental(intx, PredictedLoadedClassCount, 0,                          \
+          "Experimental: Tune loaded class cache starting size.")           \
+                                                                            \
   diagnostic(bool, UnsyncloadClass, false,                                  \
           "Unstable: VM calls loadClass unsynchronized. Custom "            \
           "class loader  must call VM synchronized for findClass "          \
@@ -1554,7 +1572,7 @@
   product(uintx, ParGCDesiredObjsFromOverflowList, 20,                      \
           "The desired number of objects to claim from the overflow list")  \
                                                                             \
-  diagnostic(intx, ParGCStridesPerThread, 2,                                \
+  diagnostic(uintx, ParGCStridesPerThread, 2,                               \
           "The number of strides per worker thread that we divide up the "  \
           "card table scanning work into")                                  \
                                                                             \
@@ -3001,7 +3019,7 @@
   product(intx, SafepointTimeoutDelay, 10000,                               \
           "Delay in milliseconds for option SafepointTimeout")              \
                                                                             \
-  product(intx, NmethodSweepFraction, 4,                                    \
+  product(intx, NmethodSweepFraction, 16,                                    \
           "Number of invocations of sweeper to cover all nmethods")         \
                                                                             \
   product(intx, NmethodSweepCheckInterval, 5,                               \
@@ -3478,16 +3496,19 @@
           "    Linux this policy requires root privilege.")                 \
                                                                             \
   product(bool, ThreadPriorityVerbose, false,                               \
-          "print priority changes")                                         \
+          "Print priority changes")                                         \
                                                                             \
   product(intx, DefaultThreadPriority, -1,                                  \
-          "what native priority threads run at if not specified elsewhere (-1 means no change)") \
+          "The native priority at which threads run if not elsewhere "      \
+          "specified (-1 means no change)")                                 \
                                                                             \
   product(intx, CompilerThreadPriority, -1,                                 \
-          "what priority should compiler threads run at (-1 means no change)") \
+          "The native priority at which compiler threads should run "       \
+          "(-1 means no change)")                                           \
                                                                             \
   product(intx, VMThreadPriority, -1,                                       \
-          "what priority should VM threads run at (-1 means no change)")    \
+          "The native priority at which the VM thread should run "          \
+          "(-1 means no change)")                                           \
                                                                             \
   product(bool, CompilerThreadHintNoPreempt, true,                          \
           "(Solaris only) Give compiler threads an extra quanta")           \
@@ -3506,6 +3527,15 @@
   product(intx, JavaPriority9_To_OSPriority, -1, "Map Java priorities to OS priorities") \
   product(intx, JavaPriority10_To_OSPriority,-1, "Map Java priorities to OS priorities") \
                                                                             \
+  experimental(bool, UseCriticalJavaThreadPriority, false,                  \
+          "Java thread priority 10 maps to critical scheduling priority")   \
+                                                                            \
+  experimental(bool, UseCriticalCompilerThreadPriority, false,              \
+          "Compiler thread(s) run at critical scheduling priority")         \
+                                                                            \
+  experimental(bool, UseCriticalCMSThreadPriority, false,                   \
+          "ConcurrentMarkSweep thread runs at critical scheduling priority")\
+                                                                            \
   /* compiler debugging */                                                  \
   notproduct(intx, CompileTheWorldStartAt,     1,                           \
           "First class to consider when using +CompileTheWorld")            \
@@ -3575,7 +3605,7 @@
           "Threshold at which tier 3 compilation is invoked (invocation "   \
           "minimum must be satisfied.")                                     \
                                                                             \
-  product(intx, Tier3BackEdgeThreshold,  7000,                              \
+  product(intx, Tier3BackEdgeThreshold,  60000,                             \
           "Back edge threshold at which tier 3 OSR compilation is invoked") \
                                                                             \
   product(intx, Tier4InvocationThreshold, 5000,                             \
@@ -3827,10 +3857,6 @@
   develop(bool, StressMethodHandleWalk, false,                              \
           "Process all method handles with MethodHandleWalk")               \
                                                                             \
-  diagnostic(bool, UseRicochetFrames, true,                                 \
-          "use ricochet stack frames for method handle combination, "       \
-          "if the platform supports them")                                  \
-                                                                            \
   experimental(bool, TrustFinalNonStaticFields, false,                      \
           "trust final non-static declarations for constant folding")       \
                                                                             \
@@ -3876,7 +3902,7 @@
   product(bool, UseVMInterruptibleIO, false,                                \
           "(Unstable, Solaris-specific) Thread interrupt before or with "   \
           "EINTR for I/O operations results in OS_INTRPT. The default value"\
-          " of this flag is true for JDK 6 and earliers")
+          " of this flag is true for JDK 6 and earlier")
 
 /*
  *  Macros for factoring of globals
--- a/src/share/vm/runtime/globals_ext.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/runtime/globals_ext.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -53,4 +53,12 @@
   return true;
 }
 
+inline bool Flag::is_writeable_ext() const {
+  return false;
+}
+
+inline bool Flag::is_external_ext() const {
+  return false;
+}
+
 #endif // SHARE_VM_RUNTIME_GLOBALS_EXT_HPP
--- a/src/share/vm/runtime/init.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/runtime/init.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -47,15 +47,16 @@
 void classLoader_init();
 void codeCache_init();
 void VM_Version_init();
+void os_init_globals();        // depends on VM_Version_init, before universe_init
 void stubRoutines_init1();
-jint universe_init();  // dependent on codeCache_init and stubRoutines_init
-void interpreter_init();  // before any methods loaded
-void invocationCounter_init();  // before any methods loaded
+jint universe_init();          // depends on codeCache_init and stubRoutines_init
+void interpreter_init();       // before any methods loaded
+void invocationCounter_init(); // before any methods loaded
 void marksweep_init();
 void accessFlags_init();
 void templateTable_init();
 void InterfaceSupport_init();
-void universe2_init();  // dependent on codeCache_init and stubRoutines_init
+void universe2_init();  // dependent on codeCache_init and stubRoutines_init, loads primordial classes
 void referenceProcessor_init();
 void jni_handles_init();
 void vmStructs_init();
@@ -64,7 +65,7 @@
 void InlineCacheBuffer_init();
 void compilerOracle_init();
 void compilationPolicy_init();
-
+void compileBroker_init();
 
 // Initialization after compiler initialization
 bool universe_post_init();  // must happen after compiler_init
@@ -94,8 +95,10 @@
   classLoader_init();
   codeCache_init();
   VM_Version_init();
+  os_init_globals();
   stubRoutines_init1();
-  jint status = universe_init();  // dependent on codeCache_init and stubRoutines_init
+  jint status = universe_init();  // dependent on codeCache_init and
+                                  // stubRoutines_init1
   if (status != JNI_OK)
     return status;
 
@@ -106,7 +109,7 @@
   templateTable_init();
   InterfaceSupport_init();
   SharedRuntime::generate_stubs();
-  universe2_init();  // dependent on codeCache_init and stubRoutines_init
+  universe2_init();  // dependent on codeCache_init and stubRoutines_init1
   referenceProcessor_init();
   jni_handles_init();
 #ifndef VM_STRUCTS_KERNEL
@@ -117,12 +120,13 @@
   InlineCacheBuffer_init();
   compilerOracle_init();
   compilationPolicy_init();
+  compileBroker_init();
   VMRegImpl::set_regName();
 
   if (!universe_post_init()) {
     return JNI_ERR;
   }
-  javaClasses_init();  // must happen after vtable initialization
+  javaClasses_init();   // must happen after vtable initialization
   stubRoutines_init2(); // note: StubRoutines need 2-phase init
 
   // Although we'd like to, we can't easily do a heap verify
@@ -137,7 +141,7 @@
   // All the flags that get adjusted by VM_Version_init and os::init_2
   // have been set so dump the flags now.
   if (PrintFlagsFinal) {
-    CommandLineFlags::printFlags();
+    CommandLineFlags::printFlags(tty, false);
   }
 
   return JNI_OK;
--- a/src/share/vm/runtime/java.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/runtime/java.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -60,6 +60,8 @@
 #include "runtime/task.hpp"
 #include "runtime/timer.hpp"
 #include "runtime/vm_operations.hpp"
+#include "trace/tracing.hpp"
+#include "trace/traceEventTypes.hpp"
 #include "utilities/dtrace.hpp"
 #include "utilities/globalDefinitions.hpp"
 #include "utilities/histogram.hpp"
@@ -515,6 +517,11 @@
   if (JvmtiExport::should_post_thread_life()) {
     JvmtiExport::post_thread_end(thread);
   }
+
+  EVENT_BEGIN(TraceEventThreadEnd, event);
+  EVENT_COMMIT(event,
+      EVENT_SET(event, javalangthread, java_lang_Thread::thread_id(thread->threadObj())));
+
   // Always call even when there are not JVMTI environments yet, since environments
   // may be attached late and JVMTI must track phases of VM execution
   JvmtiExport::post_vm_death();
--- a/src/share/vm/runtime/javaCalls.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/runtime/javaCalls.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -61,8 +61,7 @@
 
   guarantee(thread->is_Java_thread(), "crucial check - the VM thread cannot and must not escape to Java code");
   assert(!thread->owns_locks(), "must release all locks when leaving VM");
-  // (tw) may we do this?
-  // guarantee(!thread->is_Compiler_thread(), "cannot make java calls from the compiler");
+  guarantee(!thread->is_Compiler_thread(), "cannot make java calls from the compiler");
   _result   = result;
 
   // Allocate handle block for Java code. This must be done before we change thread_state to _thread_in_Java_or_stub,
@@ -373,8 +372,7 @@
 #endif
 
 
-  // (tw) may we do this?
-  //assert(!thread->is_Compiler_thread(), "cannot compile from the compiler");
+  assert(!thread->is_Compiler_thread(), "cannot compile from the compiler");
   if (CompilationPolicy::must_be_compiled(method)) {
     CompileBroker::compile_method(method, InvocationEntryBci,
                                   CompilationPolicy::policy()->initial_compile_level(),
--- a/src/share/vm/runtime/mutex.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/runtime/mutex.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,6 +1,6 @@
 
 /*
- * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -527,7 +527,21 @@
 
 void Monitor::IUnlock (bool RelaxAssert) {
   assert (ILocked(), "invariant") ;
-  _LockWord.Bytes[_LSBINDEX] = 0 ;       // drop outer lock
+  // Conceptually we need a MEMBAR #storestore|#loadstore barrier or fence immediately
+  // before the store that releases the lock.  Crucially, all the stores and loads in the
+  // critical section must be globally visible before the store of 0 into the lock-word
+  // that releases the lock becomes globally visible.  That is, memory accesses in the
+  // critical section should not be allowed to bypass or overtake the following ST that
+  // releases the lock.  As such, to prevent accesses within the critical section
+  // from "leaking" out, we need a release fence between the critical section and the
+  // store that releases the lock.  In practice that release barrier is elided on
+  // platforms with strong memory models such as TSO.
+  //
+  // Note that the OrderAccess::storeload() fence that appears after unlock store
+  // provides for progress conditions and succession and is _not related to exclusion
+  // safety or lock release consistency.
+  OrderAccess::release_store(&_LockWord.Bytes[_LSBINDEX], 0); // drop outer lock
+
   OrderAccess::storeload ();
   ParkEvent * const w = _OnDeck ;
   assert (RelaxAssert || w != Thread::current()->_MutexEvent, "invariant") ;
@@ -1282,10 +1296,6 @@
 
       assert(this->rank() >= 0, "bad lock rank");
 
-      if (LogMultipleMutexLocking && locks != NULL) {
-        Events::log("thread " INTPTR_FORMAT " locks %s, already owns %s", new_owner, name(), locks->name());
-      }
-
       // Deadlock avoidance rules require us to acquire Mutexes only in
       // a global total order. For example m1 is the lowest ranked mutex
       // that the thread holds and m2 is the mutex the thread is trying
@@ -1329,10 +1339,6 @@
     #ifdef ASSERT
       Monitor *locks = old_owner->owned_locks();
 
-      if (LogMultipleMutexLocking && locks != this) {
-        Events::log("thread " INTPTR_FORMAT " unlocks %s, still owns %s", old_owner, this->name(), locks->name());
-      }
-
       // remove "this" from the owned locks list
 
       Monitor *prev = NULL;
--- a/src/share/vm/runtime/mutexLocker.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/runtime/mutexLocker.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -126,13 +126,20 @@
 Mutex*   FreeList_lock                = NULL;
 Monitor* SecondaryFreeList_lock       = NULL;
 Mutex*   OldSets_lock                 = NULL;
+Monitor* RootRegionScan_lock          = NULL;
 Mutex*   MMUTracker_lock              = NULL;
 Mutex*   HotCardCache_lock            = NULL;
 
 Monitor* GCTaskManager_lock           = NULL;
 
 Mutex*   Management_lock              = NULL;
-Monitor* Service_lock               = NULL;
+Monitor* Service_lock                 = NULL;
+Mutex*   Stacktrace_lock              = NULL;
+
+Monitor* JfrQuery_lock                = NULL;
+Monitor* JfrMsg_lock                  = NULL;
+Mutex*   JfrBuffer_lock               = NULL;
+Mutex*   JfrStream_lock               = NULL;
 
 #define MAX_NUM_MUTEX 128
 static Monitor * _mutex_array[MAX_NUM_MUTEX];
@@ -193,6 +200,7 @@
     def(FreeList_lock              , Mutex,   leaf     ,   true );
     def(SecondaryFreeList_lock     , Monitor, leaf     ,   true );
     def(OldSets_lock               , Mutex  , leaf     ,   true );
+    def(RootRegionScan_lock        , Monitor, leaf     ,   true );
     def(MMUTracker_lock            , Mutex  , leaf     ,   true );
     def(HotCardCache_lock          , Mutex  , special  ,   true );
     def(EvacFailureStack_lock      , Mutex  , nonleaf  ,   true );
@@ -207,6 +215,7 @@
   def(Patching_lock                , Mutex  , special,     true ); // used for safepointing and code patching.
   def(ObjAllocPost_lock            , Monitor, special,     false);
   def(Service_lock                 , Monitor, special,     true ); // used for service thread operations
+  def(Stacktrace_lock              , Mutex,   special,     true ); // used for JFR stacktrace database
   def(JmethodIdCreation_lock       , Mutex  , leaf,        true ); // used for creating jmethodIDs.
 
   def(SystemDictionary_lock        , Monitor, leaf,        true ); // lookups done by VM thread
@@ -271,6 +280,11 @@
   def(Debug3_lock                  , Mutex  , nonleaf+4,   true );
   def(ProfileVM_lock               , Monitor, nonleaf+4,   false); // used for profiling of the VMThread
   def(CompileThread_lock           , Monitor, nonleaf+5,   false );
+
+  def(JfrQuery_lock                , Monitor, nonleaf,     true);  // JFR locks, keep these in consecutive order
+  def(JfrMsg_lock                  , Monitor, nonleaf+2,   true);
+  def(JfrBuffer_lock               , Mutex,   nonleaf+3,   true);
+  def(JfrStream_lock               , Mutex,   nonleaf+4,   true);
 }
 
 GCMutexLocker::GCMutexLocker(Monitor * mutex) {
--- a/src/share/vm/runtime/mutexLocker.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/runtime/mutexLocker.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -115,7 +115,7 @@
 
 #ifndef PRODUCT
 extern Mutex*   FullGCALot_lock;                 // a lock to make FullGCALot MT safe
-#endif
+#endif // PRODUCT
 extern Mutex*   Debug1_lock;                     // A bunch of pre-allocated locks that can be used for tracing
 extern Mutex*   Debug2_lock;                     // down synchronization related bugs!
 extern Mutex*   Debug3_lock;
@@ -129,12 +129,19 @@
 extern Mutex*   FreeList_lock;                   // protects the free region list during safepoints
 extern Monitor* SecondaryFreeList_lock;          // protects the secondary free region list
 extern Mutex*   OldSets_lock;                    // protects the old region sets
+extern Monitor* RootRegionScan_lock;             // used to notify that the CM threads have finished scanning the IM snapshot regions
 extern Mutex*   MMUTracker_lock;                 // protects the MMU
                                                  // tracker data structures
 extern Mutex*   HotCardCache_lock;               // protects the hot card cache
 
 extern Mutex*   Management_lock;                 // a lock used to serialize JVM management
 extern Monitor* Service_lock;                    // a lock used for service thread operation
+extern Mutex*   Stacktrace_lock;                 // used to guard access to the stacktrace table
+
+extern Monitor* JfrQuery_lock;                   // protects JFR use
+extern Monitor* JfrMsg_lock;                     // protects JFR messaging
+extern Mutex*   JfrBuffer_lock;                  // protects JFR buffer operations
+extern Mutex*   JfrStream_lock;                  // protects JFR stream access
 
 // A MutexLocker provides mutual exclusion with respect to a given mutex
 // for the scope which contains the locker.  The lock is an OS lock, not
--- a/src/share/vm/runtime/os.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/runtime/os.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -82,6 +82,12 @@
 julong os::free_bytes = 0;          // # of bytes freed
 #endif
 
+void os_init_globals() {
+  // Called from init_globals().
+  // See Threads::create_vm() in thread.cpp, and init.cpp.
+  os::init_globals();
+}
+
 // Fill in buffer with current local time as an ISO-8601 string.
 // E.g., yyyy-mm-ddThh:mm:ss-zzzz.
 // Returns buffer, or NULL if it failed.
@@ -1095,6 +1101,7 @@
         "%/lib/jsse.jar:"
         "%/lib/jce.jar:"
         "%/lib/charsets.jar:"
+        "%/lib/jfr.jar:"
 #ifdef __APPLE__
         "%/lib/JObjC.jar:"
 #endif
--- a/src/share/vm/runtime/os.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/runtime/os.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -73,8 +73,9 @@
   MinPriority      =  1,     // Minimum priority
   NormPriority     =  5,     // Normal (non-daemon) priority
   NearMaxPriority  =  9,     // High priority, used for VMThread
-  MaxPriority      = 10      // Highest priority, used for WatcherThread
+  MaxPriority      = 10,     // Highest priority, used for WatcherThread
                              // ensures that VMThread doesn't starve profiler
+  CriticalPriority = 11      // Critical thread priority
 };
 
 // Typedef for structured exception handling support
@@ -99,9 +100,11 @@
   }
 
  public:
-
   static void init(void);                      // Called before command line parsing
   static jint init_2(void);                    // Called after command line parsing
+  static void init_globals(void) {             // Called from init_globals() in init.cpp
+    init_globals_ext();
+  }
   static void init_3(void);                    // Called at the end of vm init
 
   // File names are case-insensitive on windows only
@@ -256,7 +259,7 @@
                              char *addr, size_t bytes, bool read_only,
                              bool allow_exec);
   static bool   unmap_memory(char *addr, size_t bytes);
-  static void   free_memory(char *addr, size_t bytes);
+  static void   free_memory(char *addr, size_t bytes, size_t alignment_hint);
   static void   realign_memory(char *addr, size_t bytes, size_t alignment_hint);
 
   // NUMA-specific interface
@@ -500,6 +503,7 @@
 
   static void print_location(outputStream* st, intptr_t x, bool verbose = false);
   static size_t lasterror(char *buf, size_t len);
+  static int get_last_error();
 
   // Determines whether the calling process is being debugged by a user-mode debugger.
   static bool is_debugger_attached();
@@ -584,28 +588,28 @@
   static int socket(int domain, int type, int protocol);
   static int socket_close(int fd);
   static int socket_shutdown(int fd, int howto);
-  static int recv(int fd, char *buf, int nBytes, int flags);
-  static int send(int fd, char *buf, int nBytes, int flags);
-  static int raw_send(int fd, char *buf, int nBytes, int flags);
+  static int recv(int fd, char* buf, size_t nBytes, uint flags);
+  static int send(int fd, char* buf, size_t nBytes, uint flags);
+  static int raw_send(int fd, char* buf, size_t nBytes, uint flags);
   static int timeout(int fd, long timeout);
   static int listen(int fd, int count);
-  static int connect(int fd, struct sockaddr *him, int len);
-  static int bind(int fd, struct sockaddr *him, int len);
-  static int accept(int fd, struct sockaddr *him, int *len);
-  static int recvfrom(int fd, char *buf, int nbytes, int flags,
-                             struct sockaddr *from, int *fromlen);
-  static int get_sock_name(int fd, struct sockaddr *him, int *len);
-  static int sendto(int fd, char *buf, int len, int flags,
-                           struct sockaddr *to, int tolen);
-  static int socket_available(int fd, jint *pbytes);
+  static int connect(int fd, struct sockaddr* him, socklen_t len);
+  static int bind(int fd, struct sockaddr* him, socklen_t len);
+  static int accept(int fd, struct sockaddr* him, socklen_t* len);
+  static int recvfrom(int fd, char* buf, size_t nbytes, uint flags,
+                      struct sockaddr* from, socklen_t* fromlen);
+  static int get_sock_name(int fd, struct sockaddr* him, socklen_t* len);
+  static int sendto(int fd, char* buf, size_t len, uint flags,
+                    struct sockaddr* to, socklen_t tolen);
+  static int socket_available(int fd, jint* pbytes);
 
   static int get_sock_opt(int fd, int level, int optname,
-                           char *optval, int* optlen);
+                          char* optval, socklen_t* optlen);
   static int set_sock_opt(int fd, int level, int optname,
-                           const char *optval, int optlen);
+                          const char* optval, socklen_t optlen);
   static int get_host_name(char* name, int namelen);
 
-  static struct hostent*  get_host_by_name(char* name);
+  static struct hostent* get_host_by_name(char* name);
 
   // Printing 64 bit integers
   static const char* jlong_format_specifier();
@@ -671,6 +675,11 @@
   // rest of line is skipped. Returns number of bytes read or -1 on EOF
   static int get_line_chars(int fd, char *buf, const size_t bsize);
 
+  // Extensions
+#include "runtime/os_ext.hpp"
+
+ public:
+
   // Platform dependent stuff
 #ifdef TARGET_OS_FAMILY_linux
 # include "os_linux.hpp"
@@ -715,7 +724,7 @@
 # include "os_bsd_zero.hpp"
 #endif
 
-
+ public:
   // debugging support (mostly used by debug.cpp but also fatal error handler)
   static bool find(address pc, outputStream* st = tty); // OS specific function to make sense out of an address
 
@@ -725,7 +734,7 @@
   // Thread priority helpers (implemented in OS-specific part)
   static OSReturn set_native_priority(Thread* thread, int native_prio);
   static OSReturn get_native_priority(const Thread* const thread, int* priority_ptr);
-  static int java_to_os_priority[MaxPriority + 1];
+  static int java_to_os_priority[CriticalPriority + 1];
   // Hint to the underlying OS that a task switch would not be good.
   // Void return because it's a hint and can fail.
   static void hint_no_preempt();
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/runtime/os_ext.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2011 Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_RUNTIME_OS_EXT_HPP
+#define SHARE_VM_RUNTIME_OS_EXT_HPP
+
+ public:
+  static void init_globals_ext() {} // Run from init_globals().
+                                    // See os.hpp/cpp and init.cpp.
+
+ private:
+
+#endif // SHARE_VM_RUNTIME_OS_EXT_HPP
--- a/src/share/vm/runtime/rframe.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/runtime/rframe.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -152,11 +152,11 @@
 
 void RFrame::print(const char* kind) {
 #ifndef PRODUCT
-//#ifdef COMPILER2
+#if defined(COMPILER2) || defined(GRAAL)
   int cnt = top_method()->interpreter_invocation_count();
-//#else
-//  int cnt = top_method()->invocation_count();
-//#endif
+#else
+  int cnt = top_method()->invocation_count();
+#endif
   tty->print("%3d %s ", _num, is_interpreted() ? "I" : "C");
   top_method()->print_short_name(tty);
   tty->print_cr(": inv=%5d(%d) cst=%4d", _invocations, cnt, cost());
--- a/src/share/vm/runtime/safepoint.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/runtime/safepoint.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -95,6 +95,7 @@
 SafepointSynchronize::SynchronizeState volatile SafepointSynchronize::_state = SafepointSynchronize::_not_synchronized;
 volatile int  SafepointSynchronize::_waiting_to_block = 0;
 volatile int SafepointSynchronize::_safepoint_counter = 0;
+int SafepointSynchronize::_current_jni_active_count = 0;
 long  SafepointSynchronize::_end_of_last_safepoint = 0;
 static volatile int PageArmed = 0 ;        // safepoint polling page is RO|RW vs PROT_NONE
 static volatile int TryingToBlock = 0 ;    // proximate value -- for advisory use only
@@ -135,9 +136,11 @@
 
   RuntimeService::record_safepoint_begin();
 
-  {
   MutexLocker mu(Safepoint_lock);
 
+  // Reset the count of active JNI critical threads
+  _current_jni_active_count = 0;
+
   // Set number of threads to wait for, before we initiate the callbacks
   _waiting_to_block = nof_threads;
   TryingToBlock     = 0 ;
@@ -375,6 +378,9 @@
 
   OrderAccess::fence();
 
+  // Update the count of active JNI critical regions
+  GC_locker::set_jni_lock_count(_current_jni_active_count);
+
   if (TraceSafepoint) {
     VM_Operation *op = VMThread::vm_operation();
     tty->print_cr("Entering safepoint region: %s", (op != NULL) ? op->name() : "no vm operation");
@@ -392,7 +398,6 @@
     // Record how much time spend on the above cleanup tasks
     update_statistics_on_cleanup_end(os::javaTimeNanos());
   }
-  }
 }
 
 // Wake up all threads, so they are ready to resume execution after the safepoint
@@ -539,6 +544,42 @@
 }
 
 
+// See if the thread is running inside a lazy critical native and
+// update the thread critical count if so.  Also set a suspend flag to
+// cause the native wrapper to return into the JVM to do the unlock
+// once the native finishes.
+void SafepointSynchronize::check_for_lazy_critical_native(JavaThread *thread, JavaThreadState state) {
+  if (state == _thread_in_native &&
+      thread->has_last_Java_frame() &&
+      thread->frame_anchor()->walkable()) {
+    // This thread might be in a critical native nmethod so look at
+    // the top of the stack and increment the critical count if it
+    // is.
+    frame wrapper_frame = thread->last_frame();
+    CodeBlob* stub_cb = wrapper_frame.cb();
+    if (stub_cb != NULL &&
+        stub_cb->is_nmethod() &&
+        stub_cb->as_nmethod_or_null()->is_lazy_critical_native()) {
+      // A thread could potentially be in a critical native across
+      // more than one safepoint, so only update the critical state on
+      // the first one.  When it returns it will perform the unlock.
+      if (!thread->do_critical_native_unlock()) {
+#ifdef ASSERT
+        if (!thread->in_critical()) {
+          GC_locker::increment_debug_jni_lock_count();
+        }
+#endif
+        thread->enter_critical();
+        // Make sure the native wrapper calls back on return to
+        // perform the needed critical unlock.
+        thread->set_critical_native_unlock();
+      }
+    }
+  }
+}
+
+
+
 // -------------------------------------------------------------------------------------------------------
 // Implementation of Safepoint callback point
 
@@ -585,6 +626,11 @@
         _waiting_to_block--;
         thread->safepoint_state()->set_has_called_back(true);
 
+        if (thread->in_critical()) {
+          // Notice that this thread is in a critical section
+          increment_jni_active_count();
+        }
+
         // Consider (_waiting_to_block < 2) to pipeline the wakeup of the VM thread
         if (_waiting_to_block == 0) {
           Safepoint_lock->notify_all();
@@ -861,8 +907,13 @@
   // running, but are actually at a safepoint. We will happily
   // agree and update the safepoint state here.
   if (SafepointSynchronize::safepoint_safe(_thread, state)) {
-      roll_forward(_at_safepoint);
-      return;
+    roll_forward(_at_safepoint);
+    SafepointSynchronize::check_for_lazy_critical_native(_thread, state);
+    if (_thread->in_critical()) {
+      // Notice that this thread is in a critical section
+      SafepointSynchronize::increment_jni_active_count();
+    }
+    return;
   }
 
   if (state == _thread_in_vm) {
--- a/src/share/vm/runtime/safepoint.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/runtime/safepoint.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -29,6 +29,7 @@
 #include "code/nmethod.hpp"
 #include "memory/allocation.hpp"
 #include "runtime/extendedPC.hpp"
+#include "runtime/mutexLocker.hpp"
 #include "runtime/os.hpp"
 #include "utilities/ostream.hpp"
 
@@ -92,6 +93,7 @@
  private:
   static volatile SynchronizeState _state;     // Threads might read this flag directly, without acquireing the Threads_lock
   static volatile int _waiting_to_block;       // number of threads we are waiting for to block
+  static int _current_jni_active_count;        // Counts the number of active critical natives during the safepoint
 
   // This counter is used for fast versions of jni_Get<Primitive>Field.
   // An even value means there is no ongoing safepoint operations.
@@ -138,6 +140,8 @@
 
   static bool safepoint_safe(JavaThread *thread, JavaThreadState state);
 
+  static void check_for_lazy_critical_native(JavaThread *thread, JavaThreadState state);
+
   // Query
   inline static bool is_at_safepoint()   { return _state == _synchronized;  }
   inline static bool is_synchronizing()  { return _state == _synchronizing;  }
@@ -146,6 +150,11 @@
     return (_state != _not_synchronized);
   }
 
+  inline static void increment_jni_active_count() {
+    assert_locked_or_safepoint(Safepoint_lock);
+    _current_jni_active_count++;
+  }
+
   // Called when a thread volantary blocks
   static void   block(JavaThread *thread);
   static void   signal_thread_at_safepoint()              { _waiting_to_block--; }
--- a/src/share/vm/runtime/sharedRuntime.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/runtime/sharedRuntime.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -121,7 +121,6 @@
 void SharedRuntime::generate_ricochet_blob() {
   if (!EnableInvokeDynamic)  return;  // leave it as a null
 
-#ifndef TARGET_ARCH_NYI_6939861
   // allocate space for the code
   ResourceMark rm;
   // setup code generation tools
@@ -142,7 +141,6 @@
   }
 
   _ricochet_blob = RicochetBlob::create(&buffer, bounce_offset, exception_offset, frame_size_in_words);
-#endif
 }
 
 
@@ -813,10 +811,6 @@
 {
   address target_pc = NULL;
 
-  if (TraceSignals) {
-    tty->print_cr("Searching for continuation for implicit exception at %d!", pc);
-  }
-
   if (Interpreter::contains(pc)) {
 #ifdef CC_INTERP
     // C++ interpreter doesn't throw implicit exceptions
@@ -840,6 +834,7 @@
         if (thread->deopt_mark() != NULL) {
           Deoptimization::cleanup_deopt_info(thread, NULL);
         }
+        Events::log_exception(thread, "StackOverflowError at " INTPTR_FORMAT, pc);
         return StubRoutines::throw_StackOverflowError_entry();
       }
 
@@ -856,8 +851,10 @@
 
           if (vt_stub->is_abstract_method_error(pc)) {
             assert(!vt_stub->is_vtable_stub(), "should never see AbstractMethodErrors from vtable-type VtableStubs");
+            Events::log_exception(thread, "AbstractMethodError at " INTPTR_FORMAT, pc);
             return StubRoutines::throw_AbstractMethodError_entry();
           } else {
+            Events::log_exception(thread, "NullPointerException at vtable entry " INTPTR_FORMAT, pc);
             return StubRoutines::throw_NullPointerException_at_call_entry();
           }
         } else {
@@ -874,6 +871,7 @@
           if (!cb->is_nmethod()) {
             guarantee(cb->is_adapter_blob() || cb->is_method_handles_adapter_blob(),
                       "exception happened outside interpreter, nmethods and vtable stubs (1)");
+            Events::log_exception(thread, "NullPointerException in code blob at " INTPTR_FORMAT, pc);
             // There is no handler here, so we will simply unwind.
             return StubRoutines::throw_NullPointerException_at_call_entry();
           }
@@ -885,6 +883,7 @@
             // => the nmethod is not yet active (i.e., the frame
             // is not set up yet) => use return address pushed by
             // caller => don't push another return address
+            Events::log_exception(thread, "NullPointerException in IC check " INTPTR_FORMAT, pc);
             return StubRoutines::throw_NullPointerException_at_call_entry();
           }
 
@@ -933,9 +932,9 @@
     // for AbortVMOnException flag
     NOT_PRODUCT(Exceptions::debug_check_abort("java.lang.NullPointerException"));
     if (exception_kind == IMPLICIT_NULL) {
-      Events::log("Implicit null exception at " INTPTR_FORMAT " to " INTPTR_FORMAT, pc, target_pc);
+      Events::log_exception(thread, "Implicit null exception at " INTPTR_FORMAT " to " INTPTR_FORMAT, pc, target_pc);
     } else {
-      Events::log("Implicit division by zero exception at " INTPTR_FORMAT " to " INTPTR_FORMAT, pc, target_pc);
+      Events::log_exception(thread, "Implicit division by zero exception at " INTPTR_FORMAT " to " INTPTR_FORMAT, pc, target_pc);
     }
     return target_pc;
   }
@@ -1588,7 +1587,6 @@
   if (caller.is_compiled_frame() && !caller.is_deoptimized_frame()) {
 
     address pc = caller.pc();
-    Events::log("update call-site at pc " INTPTR_FORMAT, pc);
 
     // Default call_addr is the location of the "basic" call.
     // Determine the address of the call we a reresolving. With
@@ -2726,6 +2724,20 @@
   return nm;
 }
 
+JRT_ENTRY_NO_ASYNC(void, SharedRuntime::block_for_jni_critical(JavaThread* thread))
+  assert(thread == JavaThread::current(), "must be");
+  // The code is about to enter a JNI lazy critical native method and
+  // _needs_gc is true, so if this thread is already in a critical
+  // section then just return, otherwise this thread should block
+  // until needs_gc has been cleared.
+  if (thread->in_critical()) {
+    return;
+  }
+  // Lock and unlock a critical section to give the system a chance to block
+  GC_locker::lock_critical(thread);
+  GC_locker::unlock_critical(thread);
+JRT_END
+
 #ifdef HAVE_DTRACE_H
 // Create a dtrace nmethod for this method.  The wrapper converts the
 // java compiled calling convention to the native convention, makes a dummy call
@@ -2807,7 +2819,7 @@
   // ResourceObject, so do not put any ResourceMarks in here.
   char *s = sig->as_C_string();
   int len = (int)strlen(s);
-  s++; len--;                  // Skip opening paren
+  *s++; len--;                  // Skip opening paren
   char *t = s+len;
   while( *(--t) != ')' ) ;      // Find close paren
 
--- a/src/share/vm/runtime/sharedRuntime.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/runtime/sharedRuntime.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -463,6 +463,9 @@
                                           VMRegPair *regs,
                                           BasicType ret_type );
 
+  // Block before entering a JNI critical method
+  static void block_for_jni_critical(JavaThread* thread);
+
 #ifdef HAVE_DTRACE_H
   // Generate a dtrace wrapper for a given method.  The method takes arguments
   // in the Java compiled code convention, marshals them to the native
--- a/src/share/vm/runtime/simpleThresholdPolicy.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/runtime/simpleThresholdPolicy.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -177,13 +177,11 @@
 }
 
 nmethod* SimpleThresholdPolicy::event(methodHandle method, methodHandle inlinee,
-                                      int branch_bci, int bci, CompLevel comp_level, nmethod* nm, TRAPS) {
+                                      int branch_bci, int bci, CompLevel comp_level, nmethod* nm, JavaThread* thread) {
   if (comp_level == CompLevel_none &&
-      JvmtiExport::can_post_interpreter_events()) {
-    assert(THREAD->is_Java_thread(), "Should be java thread");
-    if (((JavaThread*)THREAD)->is_interp_only_mode()) {
-      return NULL;
-    }
+      JvmtiExport::can_post_interpreter_events() &&
+      thread->is_interp_only_mode()) {
+    return NULL;
   }
   nmethod *osr_nm = NULL;
 
@@ -197,9 +195,9 @@
   }
 
   if (bci == InvocationEntryBci) {
-    method_invocation_event(method, inlinee, comp_level, nm, THREAD);
+    method_invocation_event(method, inlinee, comp_level, nm, thread);
   } else {
-    method_back_branch_event(method, inlinee, bci, comp_level, nm, THREAD);
+    method_back_branch_event(method, inlinee, bci, comp_level, nm, thread);
     // method == inlinee if the event originated in the main method
     int highest_level = inlinee->highest_osr_comp_level();
     if (highest_level > comp_level) {
@@ -210,7 +208,7 @@
 }
 
 // Check if the method can be compiled, change level if necessary
-void SimpleThresholdPolicy::compile(methodHandle mh, int bci, CompLevel level, TRAPS) {
+void SimpleThresholdPolicy::compile(methodHandle mh, int bci, CompLevel level, JavaThread* thread) {
   assert(level <= TieredStopAtLevel, "Invalid compilation level");
   if (level == CompLevel_none) {
     return;
@@ -221,7 +219,7 @@
   // pure C1.
   if (!can_be_compiled(mh, level)) {
     if (level == CompLevel_full_optimization && can_be_compiled(mh, CompLevel_simple)) {
-        compile(mh, bci, CompLevel_simple, THREAD);
+        compile(mh, bci, CompLevel_simple, thread);
     }
     return;
   }
@@ -232,14 +230,14 @@
     if (PrintTieredEvents) {
       print_event(COMPILE, mh, mh, bci, level);
     }
-    submit_compile(mh, bci, level, THREAD);
+    submit_compile(mh, bci, level, thread);
   }
 }
 
 // Tell the broker to compile the method
-void SimpleThresholdPolicy::submit_compile(methodHandle mh, int bci, CompLevel level, TRAPS) {
+void SimpleThresholdPolicy::submit_compile(methodHandle mh, int bci, CompLevel level, JavaThread* thread) {
   int hot_count = (bci == InvocationEntryBci) ? mh->invocation_count() : mh->backedge_count();
-  CompileBroker::compile_method(mh, bci, level, mh, hot_count, "tiered", THREAD);
+  CompileBroker::compile_method(mh, bci, level, mh, hot_count, "tiered", thread);
 }
 
 // Call and loop predicates determine whether a transition to a higher
@@ -366,11 +364,11 @@
 
 // Handle the invocation event.
 void SimpleThresholdPolicy::method_invocation_event(methodHandle mh, methodHandle imh,
-                                              CompLevel level, nmethod* nm, TRAPS) {
+                                              CompLevel level, nmethod* nm, JavaThread* thread) {
   if (is_compilation_enabled() && !CompileBroker::compilation_is_in_queue(mh, InvocationEntryBci)) {
     CompLevel next_level = call_event(mh(), level);
     if (next_level != level) {
-      compile(mh, InvocationEntryBci, next_level, THREAD);
+      compile(mh, InvocationEntryBci, next_level, thread);
     }
   }
 }
@@ -378,7 +376,7 @@
 // Handle the back branch event. Notice that we can compile the method
 // with a regular entry from here.
 void SimpleThresholdPolicy::method_back_branch_event(methodHandle mh, methodHandle imh,
-                                                     int bci, CompLevel level, nmethod* nm, TRAPS) {
+                                                     int bci, CompLevel level, nmethod* nm, JavaThread* thread) {
   // If the method is already compiling, quickly bail out.
   if (is_compilation_enabled() && !CompileBroker::compilation_is_in_queue(mh, bci)) {
     // Use loop event as an opportinity to also check there's been
@@ -391,13 +389,13 @@
                       next_osr_level < CompLevel_full_optimization ? next_osr_level : cur_level);
     bool is_compiling = false;
     if (next_level != cur_level) {
-      compile(mh, InvocationEntryBci, next_level, THREAD);
+      compile(mh, InvocationEntryBci, next_level, thread);
       is_compiling = true;
     }
 
     // Do the OSR version
     if (!is_compiling && next_osr_level != level) {
-      compile(mh, bci, next_osr_level, THREAD);
+      compile(mh, bci, next_osr_level, thread);
     }
   }
 }
--- a/src/share/vm/runtime/simpleThresholdPolicy.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/runtime/simpleThresholdPolicy.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -67,9 +67,9 @@
   // Print policy-specific information if necessary
   virtual void print_specific(EventType type, methodHandle mh, methodHandle imh, int bci, CompLevel level) { }
   // Check if the method can be compiled, change level if necessary
-  void compile(methodHandle mh, int bci, CompLevel level, TRAPS);
+  void compile(methodHandle mh, int bci, CompLevel level, JavaThread* thread);
   // Submit a given method for compilation
-  virtual void submit_compile(methodHandle mh, int bci, CompLevel level, TRAPS);
+  virtual void submit_compile(methodHandle mh, int bci, CompLevel level, JavaThread* thread);
   // Simple methods are as good being compiled with C1 as C2.
   // This function tells if it's such a function.
   inline bool is_trivial(methodOop method);
@@ -88,9 +88,9 @@
     return CompLevel_none;
   }
   virtual void method_invocation_event(methodHandle method, methodHandle inlinee,
-                                       CompLevel level, nmethod* nm, TRAPS);
+                                       CompLevel level, nmethod* nm, JavaThread* thread);
   virtual void method_back_branch_event(methodHandle method, methodHandle inlinee,
-                                        int bci, CompLevel level, nmethod* nm, TRAPS);
+                                        int bci, CompLevel level, nmethod* nm, JavaThread* thread);
 public:
   SimpleThresholdPolicy() : _c1_count(0), _c2_count(0) { }
   virtual int compiler_count(CompLevel comp_level) {
@@ -104,7 +104,7 @@
   virtual void disable_compilation(methodOop method) { }
   virtual void reprofile(ScopeDesc* trap_scope, bool is_osr);
   virtual nmethod* event(methodHandle method, methodHandle inlinee,
-                         int branch_bci, int bci, CompLevel comp_level, nmethod* nm, TRAPS);
+                         int branch_bci, int bci, CompLevel comp_level, nmethod* nm, JavaThread* thread);
   // Select task is called by CompileBroker. We should return a task or NULL.
   virtual CompileTask* select_task(CompileQueue* compile_queue);
   // Tell the runtime if we think a given method is adequately profiled.
--- a/src/share/vm/runtime/sweeper.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/runtime/sweeper.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -266,7 +266,17 @@
 
     // The last invocation iterates until there are no more nmethods
     for (int i = 0; (i < todo || _invocations == 1) && _current != NULL; i++) {
+      if (SafepointSynchronize::is_synchronizing()) { // Safepoint request
+        if (PrintMethodFlushing && Verbose) {
+          tty->print_cr("### Sweep at %d out of %d, invocation: %d, yielding to safepoint", _seen, CodeCache::nof_nmethods(), _invocations);
+        }
+        MutexUnlockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
 
+        assert(Thread::current()->is_Java_thread(), "should be java thread");
+        JavaThread* thread = (JavaThread*)Thread::current();
+        ThreadBlockInVM tbivm(thread);
+        thread->java_suspend_self();
+      }
       // Since we will give up the CodeCache_lock, always skip ahead
       // to the next nmethod.  Other blobs can be deleted by other
       // threads but nmethods are only reclaimed by the sweeper.
--- a/src/share/vm/runtime/thread.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/runtime/thread.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -36,6 +36,7 @@
 #include "interpreter/linkResolver.hpp"
 #include "interpreter/oopMapCache.hpp"
 #include "jvmtifiles/jvmtiEnv.hpp"
+#include "memory/gcLocker.inline.hpp"
 #include "memory/oopFactory.hpp"
 #include "memory/universe.inline.hpp"
 #include "oops/instanceKlass.hpp"
@@ -76,6 +77,7 @@
 #include "services/attachListener.hpp"
 #include "services/management.hpp"
 #include "services/threadService.hpp"
+#include "trace/traceEventTypes.hpp"
 #include "utilities/defaultStream.hpp"
 #include "utilities/dtrace.hpp"
 #include "utilities/events.hpp"
@@ -235,6 +237,7 @@
   CHECK_UNHANDLED_OOPS_ONLY(_gc_locked_out_count = 0;)
   _jvmti_env_iteration_count = 0;
   set_allocated_bytes(0);
+  set_trace_buffer(NULL);
   _vm_operation_started_count = 0;
   _vm_operation_completed_count = 0;
   _current_pending_monitor = NULL;
@@ -1519,6 +1522,10 @@
     JvmtiExport::post_thread_start(this);
   }
 
+  EVENT_BEGIN(TraceEventThreadStart, event);
+  EVENT_COMMIT(event,
+     EVENT_SET(event, javalangthread, java_lang_Thread::thread_id(this->threadObj())));
+
   // We call another function to do the rest so we are sure that the stack addresses used
   // from there will be lower than the stack base just computed
   thread_main_inner();
@@ -1601,8 +1608,6 @@
     // java.lang.Thread.dispatchUncaughtException
     if (uncaught_exception.not_null()) {
       Handle group(this, java_lang_Thread::threadGroup(threadObj()));
-      Events::log("uncaught exception INTPTR_FORMAT " " INTPTR_FORMAT " " INTPTR_FORMAT",
-        (address)uncaught_exception(), (address)threadObj(), (address)group());
       {
         EXCEPTION_MARK;
         // Check if the method Thread.dispatchUncaughtException() exists. If so
@@ -1648,6 +1653,15 @@
       }
     }
 
+    // Called before the java thread exit since we want to read info
+    // from java_lang_Thread object
+    EVENT_BEGIN(TraceEventThreadEnd, event);
+    EVENT_COMMIT(event,
+        EVENT_SET(event, javalangthread, java_lang_Thread::thread_id(this->threadObj())));
+
+    // Call after last event on thread
+    EVENT_THREAD_EXIT(this);
+
     // Call Thread.exit(). We try 3 times in case we got another Thread.stop during
     // the execution of the method. If that is not enough, then we don't really care. Thread.stop
     // is deprecated anyhow.
@@ -2274,6 +2288,26 @@
   }
 }
 
+// This is a variant of the normal
+// check_special_condition_for_native_trans with slightly different
+// semantics for use by critical native wrappers.  It does all the
+// normal checks but also performs the transition back into
+// thread_in_Java state.  This is required so that critical natives
+// can potentially block and perform a GC if they are the last thread
+// exiting the GC_locker.
+void JavaThread::check_special_condition_for_native_trans_and_transition(JavaThread *thread) {
+  check_special_condition_for_native_trans(thread);
+
+  // Finish the transition
+  thread->set_thread_state(_thread_in_Java);
+
+  if (thread->do_critical_native_unlock()) {
+    ThreadInVMfromJavaNoAsyncException tiv(thread);
+    GC_locker::unlock_critical(thread);
+    thread->clear_critical_native_unlock();
+  }
+}
+
 // We need to guarantee the Threads_lock here, since resumes are not
 // allowed during safepoint synchronization
 // Can only resume from an external suspension
@@ -3334,6 +3368,7 @@
       initialize_class(vmSymbols::java_lang_ArithmeticException(), CHECK_0);
       initialize_class(vmSymbols::java_lang_StackOverflowError(), CHECK_0);
       initialize_class(vmSymbols::java_lang_IllegalMonitorStateException(), CHECK_0);
+      initialize_class(vmSymbols::java_lang_IllegalArgumentException(), CHECK_0);
     } else {
       warning("java.lang.OutOfMemoryError has not been initialized");
       warning("java.lang.NullPointerException has not been initialized");
@@ -3341,6 +3376,7 @@
       warning("java.lang.ArrayStoreException has not been initialized");
       warning("java.lang.ArithmeticException has not been initialized");
       warning("java.lang.StackOverflowError has not been initialized");
+      warning("java.lang.IllegalArgumentException has not been initialized");
     }
   }
 
@@ -3370,6 +3406,11 @@
 
   quicken_jni_functions();
 
+  // Must be run after init_ft which initializes ft_enabled
+  if (TRACE_INITIALIZE() != JNI_OK) {
+    vm_exit_during_initialization("Failed to initialize tracing backend");
+  }
+
   // Set flag that basic initialization has completed. Used by exceptions and various
   // debug stuff, that does not work until all basic classes have been initialized.
   set_init_completed();
@@ -3430,6 +3471,10 @@
     create_vm_init_libraries();
   }
 
+  if (!TRACE_START()) {
+    vm_exit_during_initialization(Handle(THREAD, PENDING_EXCEPTION));
+  }
+
   // Notify JVMTI agents that VM initialization is complete - nop if no agents.
   JvmtiExport::post_vm_initialized();
 
@@ -3868,7 +3913,7 @@
   ThreadService::add_thread(p, daemon);
 
   // Possible GC point.
-  Events::log("Thread added: " INTPTR_FORMAT, p);
+  Events::log(p, "Thread added: " INTPTR_FORMAT, p);
 }
 
 void Threads::remove(JavaThread* p) {
@@ -3913,7 +3958,7 @@
   } // unlock Threads_lock
 
   // Since Events::log uses a lock, we grab it outside the Threads_lock
-  Events::log("Thread exited: " INTPTR_FORMAT, p);
+  Events::log(p, "Thread exited: " INTPTR_FORMAT, p);
 }
 
 // Threads_lock must be held when this is called (or must be called during a safepoint)
@@ -4022,9 +4067,7 @@
   {
     MutexLockerEx ml(doLock ? Threads_lock : NULL);
     ALL_JAVA_THREADS(p) {
-      
-      // (tw) May we do this?
-      //if (p->is_Compiler_thread()) continue;
+      if (p->is_Compiler_thread()) continue;
 
       address pending = (address)p->current_pending_monitor();
       if (pending == monitor) {             // found a match
--- a/src/share/vm/runtime/thread.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/runtime/thread.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -41,6 +41,7 @@
 #include "runtime/stubRoutines.hpp"
 #include "runtime/threadLocalStorage.hpp"
 #include "runtime/unhandledOops.hpp"
+#include "trace/tracing.hpp"
 #include "utilities/exceptions.hpp"
 #include "utilities/top.hpp"
 #ifndef SERIALGC
@@ -181,7 +182,8 @@
     _ext_suspended          = 0x40000000U, // thread has self-suspended
     _deopt_suspend          = 0x10000000U, // thread needs to self suspend for deopt
 
-    _has_async_exception    = 0x00000001U  // there is a pending async exception
+    _has_async_exception    = 0x00000001U, // there is a pending async exception
+    _critical_native_unlock = 0x00000002U  // Must call back to unlock JNI critical lock
   };
 
   // various suspension related flags - atomically updated
@@ -246,6 +248,8 @@
   jlong _allocated_bytes;                       // Cumulative number of bytes allocated on
                                                 // the Java heap
 
+  TRACE_BUFFER _trace_buffer;                   // Thread-local buffer for tracing
+
   int   _vm_operation_started_count;            // VM_Operation support
   int   _vm_operation_completed_count;          // VM_Operation support
 
@@ -347,6 +351,15 @@
     clear_suspend_flag(_has_async_exception);
   }
 
+  bool do_critical_native_unlock() const { return (_suspend_flags & _critical_native_unlock) != 0; }
+
+  void set_critical_native_unlock() {
+    set_suspend_flag(_critical_native_unlock);
+  }
+  void clear_critical_native_unlock() {
+    clear_suspend_flag(_critical_native_unlock);
+  }
+
   // Support for Unhandled Oop detection
 #ifdef CHECK_UNHANDLED_OOPS
  private:
@@ -414,6 +427,9 @@
     return allocated_bytes;
   }
 
+  TRACE_BUFFER trace_buffer()              { return _trace_buffer; }
+  void set_trace_buffer(TRACE_BUFFER buf)  { _trace_buffer = buf; }
+
   // VM operation support
   int vm_operation_ticket()                      { return ++_vm_operation_started_count; }
   int vm_operation_completed_count()             { return _vm_operation_completed_count; }
@@ -1053,6 +1069,11 @@
   // Check for async exception in addition to safepoint and suspend request.
   static void check_special_condition_for_native_trans(JavaThread *thread);
 
+  // Same as check_special_condition_for_native_trans but finishes the
+  // transition into thread_in_Java mode so that it can potentially
+  // block.
+  static void check_special_condition_for_native_trans_and_transition(JavaThread *thread);
+
   bool is_ext_suspend_completed(bool called_by_wait, int delay, uint32_t *bits);
   bool is_ext_suspend_completed_with_lock(uint32_t *bits) {
     MutexLockerEx ml(SR_lock(), Mutex::_no_safepoint_check_flag);
@@ -1330,8 +1351,10 @@
 
   // JNI critical regions. These can nest.
   bool in_critical()    { return _jni_active_critical > 0; }
-  void enter_critical() { assert(Thread::current() == this,
-                                 "this must be current thread");
+  bool in_last_critical()  { return _jni_active_critical == 1; }
+  void enter_critical() { assert(Thread::current() == this ||
+                                 Thread::current()->is_VM_thread() && SafepointSynchronize::is_synchronizing(),
+                                 "this must be current thread or synchronizing");
                           _jni_active_critical++; }
   void exit_critical()  { assert(Thread::current() == this,
                                  "this must be current thread");
--- a/src/share/vm/runtime/vframeArray.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/runtime/vframeArray.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -127,6 +127,7 @@
 
   // Now the expressions off-stack
   // Same silliness as above
+
   StackValueCollection *exprs = vf->expressions();
   _expressions = new StackValueCollection(exprs->size());
   for(index = 0; index < exprs->size(); index++) {
@@ -317,7 +318,8 @@
   // only unpacks the part of the expression stack not used by callee
   // as parameters. The callee parameters are unpacked as part of the
   // callee locals.
-  for(int i = 0; i < expressions()->size(); i++) {
+  int i;
+  for(i = 0; i < expressions()->size(); i++) {
     StackValue *value = expressions()->at(i);
     intptr_t*   addr  = iframe()->interpreter_frame_expression_stack_at(i);
     switch(value->type()) {
@@ -354,7 +356,7 @@
 
 
   // Unpack the locals
-  for(int i = 0; i < locals()->size(); i++) {
+  for(i = 0; i < locals()->size(); i++) {
     StackValue *value = locals()->at(i);
     intptr_t* addr  = iframe()->interpreter_frame_local_at(i);
     switch(value->type()) {
--- a/src/share/vm/runtime/virtualspace.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/runtime/virtualspace.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -455,7 +455,7 @@
 
 void ReservedSpace::protect_noaccess_prefix(const size_t size) {
   assert( (_noaccess_prefix != 0) == (UseCompressedOops && _base != NULL &&
-                                      (size_t(_base + _size) > OopEncodingHeapMax) &&
+                                      (Universe::narrow_oop_base() != NULL) &&
                                       Universe::narrow_oop_use_implicit_null_checks()),
          "noaccess_prefix should be used only with non zero based compressed oops");
 
--- a/src/share/vm/runtime/vmStructs.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/runtime/vmStructs.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -295,7 +295,7 @@
   nonstatic_field(instanceKlass,               _nof_implementors,                             int)                                   \
   nonstatic_field(instanceKlass,               _implementors[0],                              klassOop)                              \
   nonstatic_field(instanceKlass,               _fields,                                       typeArrayOop)                          \
-  nonstatic_field(instanceKlass,               _java_fields_count,                             int)                                   \
+  nonstatic_field(instanceKlass,               _java_fields_count,                            u2)                                    \
   nonstatic_field(instanceKlass,               _constants,                                    constantPoolOop)                       \
   nonstatic_field(instanceKlass,               _class_loader,                                 oop)                                   \
   nonstatic_field(instanceKlass,               _protection_domain,                            oop)                                   \
@@ -305,16 +305,16 @@
   nonstatic_field(instanceKlass,               _inner_classes,                                typeArrayOop)                          \
   nonstatic_field(instanceKlass,               _nonstatic_field_size,                         int)                                   \
   nonstatic_field(instanceKlass,               _static_field_size,                            int)                                   \
-  nonstatic_field(instanceKlass,               _static_oop_field_count,                       int)                                   \
+  nonstatic_field(instanceKlass,               _static_oop_field_count,                       u2)                                   \
   nonstatic_field(instanceKlass,               _nonstatic_oop_map_size,                       int)                                   \
   nonstatic_field(instanceKlass,               _is_marked_dependent,                          bool)                                  \
   nonstatic_field(instanceKlass,               _minor_version,                                u2)                                    \
   nonstatic_field(instanceKlass,               _major_version,                                u2)                                    \
-  nonstatic_field(instanceKlass,               _init_state,                                   instanceKlass::ClassState)             \
+  nonstatic_field(instanceKlass,               _init_state,                                   u1)                                    \
   nonstatic_field(instanceKlass,               _init_thread,                                  Thread*)                               \
   nonstatic_field(instanceKlass,               _vtable_len,                                   int)                                   \
   nonstatic_field(instanceKlass,               _itable_len,                                   int)                                   \
-  nonstatic_field(instanceKlass,               _reference_type,                               ReferenceType)                         \
+  nonstatic_field(instanceKlass,               _reference_type,                               u1)                                    \
   volatile_nonstatic_field(instanceKlass,      _oop_map_cache,                                OopMapCache*)                          \
   nonstatic_field(instanceKlass,               _jni_ids,                                      JNIid*)                                \
   nonstatic_field(instanceKlass,               _osr_nmethods_head,                            nmethod*)                              \
@@ -1362,6 +1362,7 @@
   /* The compiler thinks this is a different type than */                 \
   /* unsigned short on Win32 */                                           \
   declare_unsigned_integer_type(u2)                                       \
+  declare_unsigned_integer_type(u1)                                       \
   declare_unsigned_integer_type(unsigned)                                 \
                                                                           \
   /*****************************/                                         \
@@ -2260,13 +2261,6 @@
                                                                           \
   declare_constant(SymbolTable::symbol_table_size)                        \
                                                                           \
-  /********************/                                                  \
-  /* SystemDictionary */                                                  \
-  /********************/                                                  \
-                                                                          \
-  declare_constant(SystemDictionary::_loader_constraint_size)             \
-  declare_constant(SystemDictionary::_nof_buckets)                        \
-                                                                          \
   /***********************************/                                   \
   /* LoaderConstraintTable constants */                                   \
   /***********************************/                                   \
--- a/src/share/vm/runtime/vm_operations.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/runtime/vm_operations.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -93,6 +93,7 @@
   template(HeapWalkOperation)                     \
   template(HeapIterateOperation)                  \
   template(ReportJavaOutOfMemory)                 \
+  template(JFRCheckpoint)                         \
   template(Exit)                                  \
 
 class VM_Operation: public CHeapObj {
--- a/src/share/vm/runtime/vm_version.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/runtime/vm_version.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -169,6 +169,13 @@
   return VM_RELEASE;
 }
 
+// NOTE: do *not* use stringStream. this function is called by
+//       fatal error handlers. if the crash is in native thread,
+//       stringStream cannot get resource allocated and will SEGV.
+const char* Abstract_VM_Version::jre_release_version() {
+  return JRE_RELEASE_VERSION;
+}
+
 #define OS       LINUX_ONLY("linux")             \
                  WINDOWS_ONLY("windows")         \
                  SOLARIS_ONLY("solaris")         \
--- a/src/share/vm/runtime/vm_version.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/runtime/vm_version.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -71,6 +71,7 @@
 
   // Internal version providing additional build information
   static const char* internal_vm_info_string();
+  static const char* jre_release_version();
 
   // does HW support an 8-byte compare-exchange operation?
   static bool supports_cx8()  {return _supports_cx8;}
--- a/src/share/vm/services/attachListener.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/services/attachListener.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -99,6 +99,7 @@
 }
 
 // Implementation of "properties" command.
+// See also: PrintSystemPropertiesDCmd class
 static jint get_system_properties(AttachOperation* op, outputStream* out) {
   return get_properties(op, out, vmSymbols::serializePropertiesToByteArray_name());
 }
@@ -127,6 +128,7 @@
 }
 
 // Implementation of "threaddump" command - essentially a remote ctrl-break
+// See also: ThreadDumpDCmd class
 //
 static jint thread_dump(AttachOperation* op, outputStream* out) {
   bool print_concurrent_locks = false;
@@ -158,6 +160,7 @@
   DCmd::parse_and_execute(out, op->arg(0), ' ', THREAD);
   if (HAS_PENDING_EXCEPTION) {
     java_lang_Throwable::print(PENDING_EXCEPTION, out);
+    out->cr();
     CLEAR_PENDING_EXCEPTION;
     // The exception has been printed on the output stream
     // If the JVM returns JNI_ERR, the attachAPI throws a generic I/O
@@ -169,6 +172,7 @@
 
 #ifndef SERVICES_KERNEL   // Heap dumping not supported
 // Implementation of "dumpheap" command.
+// See also: HeapDumpDCmd class
 //
 // Input arguments :-
 //   arg0: Name of the dump file
@@ -211,6 +215,7 @@
 #endif // SERVICES_KERNEL
 
 // Implementation of "inspectheap" command
+// See also: ClassHistogramDCmd class
 //
 // Input arguments :-
 //   arg0: "-live" or "-all"
@@ -354,6 +359,7 @@
 }
 
 // Implementation of "printflag" command
+// See also: PrintVMFlagsDCmd class
 static jint print_flag(AttachOperation* op, outputStream* out) {
   const char* name = NULL;
   if ((name = op->arg(0)) == NULL) {
--- a/src/share/vm/services/diagnosticArgument.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/services/diagnosticArgument.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2012 Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -28,9 +28,16 @@
 #include "services/diagnosticArgument.hpp"
 
 void GenDCmdArgument::read_value(const char* str, size_t len, TRAPS) {
-  if (is_set()) {
+  /* NOTE:Some argument types doesn't require a value,
+   * for instance boolean arguments: "enableFeatureX". is
+   * equivalent to "enableFeatureX=true". In these cases,
+   * str will be null. This is perfectly valid.
+   * All argument types must perform null checks on str.
+   */
+
+  if (is_set() && !allow_multiple()) {
     THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(),
-            "Duplicates in diagnostic command arguments");
+            "Duplicates in diagnostic command arguments\n");
   }
   parse_value(str, len, CHECK);
   set_is_set(true);
@@ -38,9 +45,9 @@
 
 template <> void DCmdArgument<jlong>::parse_value(const char* str,
                                                   size_t len, TRAPS) {
-  if (sscanf(str, INT64_FORMAT, &_value) != 1) {
+    if (str == NULL || sscanf(str, INT64_FORMAT, &_value) != 1) {
     THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(),
-      "Integer parsing error in diagnostic command arguments");
+      "Integer parsing error in diagnostic command arguments\n");
   }
 }
 
@@ -59,12 +66,13 @@
 
 template <> void DCmdArgument<bool>::parse_value(const char* str,
                                                  size_t len, TRAPS) {
+  // len is the length of the current token starting at str
   if (len == 0) {
     set_value(true);
   } else {
-    if (strcasecmp(str, "true") == 0) {
+    if (len == strlen("true") && strncasecmp(str, "true", len) == 0) {
        set_value(true);
-    } else if (strcasecmp(str, "false") == 0) {
+    } else if (len == strlen("false") && strncasecmp(str, "false", len) == 0) {
        set_value(false);
     } else {
       THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(),
@@ -88,16 +96,20 @@
 
 template <> void DCmdArgument<char*>::parse_value(const char* str,
                                                   size_t len, TRAPS) {
-  _value = NEW_C_HEAP_ARRAY(char, len+1);
-  strncpy(_value, str, len);
-  _value[len] = 0;
+  if (str == NULL) {
+    _value = NULL;
+  } else {
+    _value = NEW_C_HEAP_ARRAY(char, len+1);
+    strncpy(_value, str, len);
+    _value[len] = 0;
+  }
 }
 
 template <> void DCmdArgument<char*>::init_value(TRAPS) {
-  if (has_default()) {
+  if (has_default() && _default_string != NULL) {
     this->parse_value(_default_string, strlen(_default_string), THREAD);
     if (HAS_PENDING_EXCEPTION) {
-      fatal("Default string must be parsable");
+     fatal("Default string must be parsable");
     }
   } else {
     set_value(NULL);
@@ -110,3 +122,153 @@
     set_value(NULL);
   }
 }
+
+template <> void DCmdArgument<NanoTimeArgument>::parse_value(const char* str,
+                                                 size_t len, TRAPS) {
+  if (str == NULL) {
+    THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(),
+              "Integer parsing error nanotime value: syntax error");
+  }
+
+  int argc = sscanf(str, INT64_FORMAT , &_value._time);
+  if (argc != 1) {
+    THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(),
+              "Integer parsing error nanotime value: syntax error");
+  }
+  size_t idx = 0;
+  while(idx < len && isdigit(str[idx])) {
+    idx++;
+  }
+  if (idx == len) {
+    // only accept missing unit if the value is 0
+    if (_value._time != 0) {
+      THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(),
+                "Integer parsing error nanotime value: unit required");
+    } else {
+      _value._nanotime = 0;
+      strcpy(_value._unit, "ns");
+      return;
+    }
+  } else if(len - idx > 2) {
+    THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(),
+              "Integer parsing error nanotime value: illegal unit");
+  } else {
+    strncpy(_value._unit, &str[idx], len - idx);
+    /*Write an extra null termination. This is safe because _value._unit
+     * is declared as char[3], and length is checked to be not larger than
+     * two above. Also, this is necessary, since length might be 1, and the
+     * default value already in the string is ns, which is two chars.
+     */
+    _value._unit[len-idx] = '\0';
+  }
+
+  if (strcmp(_value._unit, "ns") == 0) {
+    _value._nanotime = _value._time;
+  } else if (strcmp(_value._unit, "us") == 0) {
+    _value._nanotime = _value._time * 1000;
+  } else if (strcmp(_value._unit, "ms") == 0) {
+    _value._nanotime = _value._time * 1000 * 1000;
+  } else if (strcmp(_value._unit, "s") == 0) {
+    _value._nanotime = _value._time * 1000 * 1000 * 1000;
+  } else if (strcmp(_value._unit, "m") == 0) {
+    _value._nanotime = _value._time * 60 * 1000 * 1000 * 1000;
+  } else if (strcmp(_value._unit, "h") == 0) {
+    _value._nanotime = _value._time * 60 * 60 * 1000 * 1000 * 1000;
+  } else if (strcmp(_value._unit, "d") == 0) {
+    _value._nanotime = _value._time * 24 * 60 * 60 * 1000 * 1000 * 1000;
+  } else {
+     THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(),
+               "Integer parsing error nanotime value: illegal unit");
+  }
+}
+
+template <> void DCmdArgument<NanoTimeArgument>::init_value(TRAPS) {
+  if (has_default()) {
+    this->parse_value(_default_string, strlen(_default_string), THREAD);
+    if (HAS_PENDING_EXCEPTION) {
+      fatal("Default string must be parsable");
+    }
+  } else {
+    _value._time = 0;
+    _value._nanotime = 0;
+    strcmp(_value._unit, "ns");
+  }
+}
+
+template <> void DCmdArgument<NanoTimeArgument>::destroy_value() { }
+
+// WARNING StringArrayArgument can only be used as an option, it cannot be
+// used as an argument with the DCmdParser
+
+template <> void DCmdArgument<StringArrayArgument*>::parse_value(const char* str,
+                                                  size_t len, TRAPS) {
+  _value->add(str,len);
+}
+
+template <> void DCmdArgument<StringArrayArgument*>::init_value(TRAPS) {
+  _value = new StringArrayArgument();
+  _allow_multiple = true;
+  if (has_default()) {
+    fatal("StringArrayArgument cannot have default value");
+  }
+}
+
+template <> void DCmdArgument<StringArrayArgument*>::destroy_value() {
+  if (_value != NULL) {
+    delete _value;
+    set_value(NULL);
+  }
+}
+
+template <> void DCmdArgument<MemorySizeArgument>::parse_value(const char* str,
+                                                  size_t len, TRAPS) {
+  if (str == NULL) {
+    THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(),
+              "Integer parsing error nanotime value: syntax error");
+  }
+
+  if (*str == '-') {
+    THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(),
+               "Parsing error memory size value: negative values not allowed");
+  }
+  int res = sscanf(str, UINT64_FORMAT "%c", &_value._val, &_value._multiplier);
+  if (res == 2) {
+     switch (_value._multiplier) {
+      case 'k': case 'K':
+         _value._size = _value._val * 1024;
+         break;
+      case 'm': case 'M':
+         _value._size = _value._val * 1024 * 1024;
+         break;
+      case 'g': case 'G':
+         _value._size = _value._val * 1024 * 1024 * 1024;
+         break;
+       default:
+         _value._size = _value._val;
+         _value._multiplier = ' ';
+         //default case should be to break with no error, since user
+         //can write size in bytes, or might have a delimiter and next arg
+         break;
+     }
+   } else if (res == 1) {
+     _value._size = _value._val;
+   } else {
+     THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(),
+               "Parsing error memory size value: invalid value");
+   }
+}
+
+template <> void DCmdArgument<MemorySizeArgument>::init_value(TRAPS) {
+  if (has_default()) {
+    this->parse_value(_default_string, strlen(_default_string), THREAD);
+    if (HAS_PENDING_EXCEPTION) {
+      fatal("Default string must be parsable");
+    }
+  } else {
+    _value._size = 0;
+    _value._val = 0;
+    _value._multiplier = ' ';
+  }
+}
+
+template <> void DCmdArgument<MemorySizeArgument>::destroy_value() { }
--- a/src/share/vm/services/diagnosticArgument.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/services/diagnosticArgument.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -31,6 +31,49 @@
 #include "runtime/thread.hpp"
 #include "utilities/exceptions.hpp"
 
+class StringArrayArgument : public CHeapObj {
+private:
+  GrowableArray<char*>* _array;
+public:
+  StringArrayArgument() {
+    _array = new(ResourceObj::C_HEAP)GrowableArray<char *>(32, true);
+    assert(_array != NULL, "Sanity check");
+  }
+  void add(const char* str, size_t len) {
+    if (str != NULL) {
+      char* ptr = NEW_C_HEAP_ARRAY(char, len+1);
+      strncpy(ptr, str, len);
+      ptr[len] = 0;
+      _array->append(ptr);
+    }
+  }
+  GrowableArray<char*>* array() {
+    return _array;
+  }
+  ~StringArrayArgument() {
+    for (int i=0; i<_array->length(); i++) {
+      if(_array->at(i) != NULL) { // Safety check
+        FREE_C_HEAP_ARRAY(char, _array->at(i));
+      }
+    }
+    delete _array;
+  }
+};
+
+class NanoTimeArgument {
+public:
+  jlong _nanotime;
+  jlong _time;
+  char _unit[3];
+};
+
+class MemorySizeArgument {
+public:
+  u8 _size;
+  u8 _val;
+  char _multiplier;
+};
+
 class GenDCmdArgument : public ResourceObj {
 protected:
   GenDCmdArgument* _next;
@@ -40,6 +83,7 @@
   const char*      _default_string;
   bool             _is_set;
   bool             _is_mandatory;
+  bool             _allow_multiple;
   GenDCmdArgument(const char* name, const char* description, const char* type,
                   const char* default_string, bool mandatory) {
     _name = name;
@@ -48,6 +92,7 @@
     _default_string = default_string;
     _is_mandatory = mandatory;
     _is_set = false;
+    _allow_multiple = false;
   };
 public:
   const char* name() { return _name; }
@@ -56,6 +101,7 @@
   const char* default_string() { return _default_string; }
   bool is_set() { return _is_set; }
   void set_is_set(bool b) { _is_set = b; }
+  bool allow_multiple() { return _allow_multiple; }
   bool is_mandatory() { return _is_mandatory; }
   bool has_value() { return _is_set || _default_string != NULL; }
   bool has_default() { return _default_string != NULL; }
--- a/src/share/vm/services/diagnosticCommand.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/services/diagnosticCommand.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -23,11 +23,42 @@
  */
 
 #include "precompiled.hpp"
+#include "gc_implementation/shared/vmGCOperations.hpp"
+#include "runtime/javaCalls.hpp"
 #include "services/diagnosticArgument.hpp"
 #include "services/diagnosticCommand.hpp"
 #include "services/diagnosticFramework.hpp"
+#include "services/heapDumper.hpp"
+#include "services/management.hpp"
 
-HelpDCmd::HelpDCmd(outputStream* output, bool heap) : DCmd(output, heap),
+void DCmdRegistrant::register_dcmds(){
+  // Registration of the diagnostic commands
+  // First boolean argument specifies if the command is enabled
+  // Second boolean argument specifies if the command is hidden
+  DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl<HelpDCmd>(true, false));
+  DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl<VersionDCmd>(true, false));
+  DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl<CommandLineDCmd>(true, false));
+  DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl<PrintSystemPropertiesDCmd>(true, false));
+  DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl<PrintVMFlagsDCmd>(true, false));
+  DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl<VMUptimeDCmd>(true, false));
+  DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl<SystemGCDCmd>(true, false));
+  DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl<RunFinalizationDCmd>(true, false));
+#ifndef SERVICES_KERNEL   // Heap dumping not supported
+  DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl<HeapDumpDCmd>(true, false));
+#endif // SERVICES_KERNEL
+  DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl<ClassHistogramDCmd>(true, false));
+  DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl<ThreadDumpDCmd>(true, false));
+
+}
+
+#ifndef HAVE_EXTRA_DCMD
+void DCmdRegistrant::register_dcmds_ext(){
+   // Do nothing here
+}
+#endif
+
+
+HelpDCmd::HelpDCmd(outputStream* output, bool heap) : DCmdWithParser(output, heap),
   _all("-all", "Show help for all commands", "BOOLEAN", false, "false"),
   _cmd("command name", "The name of the command for which we want help",
         "STRING", false) {
@@ -35,14 +66,6 @@
   _dcmdparser.add_dcmd_argument(&_cmd);
 };
 
-void HelpDCmd::parse(CmdLine* line, char delim, TRAPS) {
-  _dcmdparser.parse(line, delim, CHECK);
-}
-
-void HelpDCmd::print_help(outputStream* out) {
-  _dcmdparser.print_help(out, name());
-}
-
 void HelpDCmd::execute(TRAPS) {
   if (_all.value()) {
     GrowableArray<const char*>* cmd_list = DCmdFactory::DCmd_list();
@@ -66,10 +89,11 @@
                          factory->is_enabled() ? "" : " [disabled]");
       output()->print_cr(factory->description());
       output()->print_cr("\nImpact: %s", factory->impact());
+      output()->cr();
       cmd = factory->create_resource_instance(output());
       if (cmd != NULL) {
         DCmdMark mark(cmd);
-        cmd->print_help(output());
+        cmd->print_help(factory->name());
       }
     } else {
       output()->print_cr("Help unavailable : '%s' : No such command", _cmd.value());
@@ -90,14 +114,6 @@
   }
 }
 
-void HelpDCmd::reset(TRAPS) {
-  _dcmdparser.reset(CHECK);
-}
-
-void HelpDCmd::cleanup() {
-  _dcmdparser.cleanup();
-}
-
 int HelpDCmd::num_arguments() {
   ResourceMark rm;
   HelpDCmd* dcmd = new HelpDCmd(NULL, false);
@@ -109,14 +125,6 @@
   }
 }
 
-GrowableArray<const char*>* HelpDCmd::argument_name_array() {
-  return _dcmdparser.argument_name_array();
-}
-
-GrowableArray<DCmdArgumentInfo*>* HelpDCmd::argument_info_array() {
-  return _dcmdparser.argument_info_array();
-}
-
 void VersionDCmd::execute(TRAPS) {
   output()->print_cr("%s version %s", Abstract_VM_Version::vm_name(),
           Abstract_VM_Version::vm_release());
@@ -129,3 +137,210 @@
             jdk_version.minor_version());
   }
 }
+
+PrintVMFlagsDCmd::PrintVMFlagsDCmd(outputStream* output, bool heap) :
+                                   DCmdWithParser(output, heap),
+  _all("-all", "Print all flags supported by the VM", "BOOLEAN", false, "false") {
+  _dcmdparser.add_dcmd_option(&_all);
+}
+
+void PrintVMFlagsDCmd::execute(TRAPS) {
+  if (_all.value()) {
+    CommandLineFlags::printFlags(output(), true);
+  } else {
+    CommandLineFlags::printSetFlags(output());
+  }
+}
+
+int PrintVMFlagsDCmd::num_arguments() {
+    ResourceMark rm;
+    PrintVMFlagsDCmd* dcmd = new PrintVMFlagsDCmd(NULL, false);
+    if (dcmd != NULL) {
+      DCmdMark mark(dcmd);
+      return dcmd->_dcmdparser.num_arguments();
+    } else {
+      return 0;
+    }
+}
+
+void PrintSystemPropertiesDCmd::execute(TRAPS) {
+  // load sun.misc.VMSupport
+  Symbol* klass = vmSymbols::sun_misc_VMSupport();
+  klassOop k = SystemDictionary::resolve_or_fail(klass, true, CHECK);
+  instanceKlassHandle ik (THREAD, k);
+  if (ik->should_be_initialized()) {
+    ik->initialize(THREAD);
+  }
+  if (HAS_PENDING_EXCEPTION) {
+    java_lang_Throwable::print(PENDING_EXCEPTION, output());
+    output()->cr();
+    CLEAR_PENDING_EXCEPTION;
+    return;
+  }
+
+  // invoke the serializePropertiesToByteArray method
+  JavaValue result(T_OBJECT);
+  JavaCallArguments args;
+
+  Symbol* signature = vmSymbols::serializePropertiesToByteArray_signature();
+  JavaCalls::call_static(&result,
+                         ik,
+                         vmSymbols::serializePropertiesToByteArray_name(),
+                         signature,
+                         &args,
+                         THREAD);
+  if (HAS_PENDING_EXCEPTION) {
+    java_lang_Throwable::print(PENDING_EXCEPTION, output());
+    output()->cr();
+    CLEAR_PENDING_EXCEPTION;
+    return;
+  }
+
+  // The result should be a [B
+  oop res = (oop)result.get_jobject();
+  assert(res->is_typeArray(), "just checking");
+  assert(typeArrayKlass::cast(res->klass())->element_type() == T_BYTE, "just checking");
+
+  // copy the bytes to the output stream
+  typeArrayOop ba = typeArrayOop(res);
+  jbyte* addr = typeArrayOop(res)->byte_at_addr(0);
+  output()->print_raw((const char*)addr, ba->length());
+}
+
+VMUptimeDCmd::VMUptimeDCmd(outputStream* output, bool heap) :
+                           DCmdWithParser(output, heap),
+  _date("-date", "Add a prefix with current date", "BOOLEAN", false, "false") {
+  _dcmdparser.add_dcmd_option(&_date);
+}
+
+void VMUptimeDCmd::execute(TRAPS) {
+  if (_date.value()) {
+    output()->date_stamp(true, "", ": ");
+  }
+  output()->time_stamp().update_to(tty->time_stamp().ticks());
+  output()->stamp();
+  output()->print_cr(" s");
+}
+
+int VMUptimeDCmd::num_arguments() {
+  ResourceMark rm;
+  VMUptimeDCmd* dcmd = new VMUptimeDCmd(NULL, false);
+  if (dcmd != NULL) {
+    DCmdMark mark(dcmd);
+    return dcmd->_dcmdparser.num_arguments();
+  } else {
+    return 0;
+  }
+}
+
+void SystemGCDCmd::execute(TRAPS) {
+  Universe::heap()->collect(GCCause::_java_lang_system_gc);
+}
+
+void RunFinalizationDCmd::execute(TRAPS) {
+  klassOop k = SystemDictionary::resolve_or_fail(vmSymbols::java_lang_System(),
+                                                 true, CHECK);
+  instanceKlassHandle klass(THREAD, k);
+  JavaValue result(T_VOID);
+  JavaCalls::call_static(&result, klass,
+                         vmSymbols::run_finalization_name(),
+                         vmSymbols::void_method_signature(), CHECK);
+}
+
+#ifndef SERVICES_KERNEL   // Heap dumping not supported
+HeapDumpDCmd::HeapDumpDCmd(outputStream* output, bool heap) :
+                           DCmdWithParser(output, heap),
+  _filename("filename","Name of the dump file", "STRING",true),
+  _all("-all", "Dump all objects, including unreachable objects",
+       "BOOLEAN", false, "false") {
+  _dcmdparser.add_dcmd_option(&_all);
+  _dcmdparser.add_dcmd_argument(&_filename);
+}
+
+void HeapDumpDCmd::execute(TRAPS) {
+  // Request a full GC before heap dump if _all is false
+  // This helps reduces the amount of unreachable objects in the dump
+  // and makes it easier to browse.
+  HeapDumper dumper(!_all.value() /* request GC if _all is false*/);
+  int res = dumper.dump(_filename.value());
+  if (res == 0) {
+    output()->print_cr("Heap dump file created");
+  } else {
+    // heap dump failed
+    ResourceMark rm;
+    char* error = dumper.error_as_C_string();
+    if (error == NULL) {
+      output()->print_cr("Dump failed - reason unknown");
+    } else {
+      output()->print_cr("%s", error);
+    }
+  }
+}
+
+int HeapDumpDCmd::num_arguments() {
+  ResourceMark rm;
+  HeapDumpDCmd* dcmd = new HeapDumpDCmd(NULL, false);
+  if (dcmd != NULL) {
+    DCmdMark mark(dcmd);
+    return dcmd->_dcmdparser.num_arguments();
+  } else {
+    return 0;
+  }
+}
+#endif // SERVICES_KERNEL
+
+ClassHistogramDCmd::ClassHistogramDCmd(outputStream* output, bool heap) :
+                                       DCmdWithParser(output, heap),
+  _all("-all", "Inspect all objects, including unreachable objects",
+       "BOOLEAN", false, "false") {
+  _dcmdparser.add_dcmd_option(&_all);
+}
+
+void ClassHistogramDCmd::execute(TRAPS) {
+  VM_GC_HeapInspection heapop(output(),
+                              !_all.value() /* request full gc if false */,
+                              true /* need_prologue */);
+  VMThread::execute(&heapop);
+}
+
+int ClassHistogramDCmd::num_arguments() {
+  ResourceMark rm;
+  ClassHistogramDCmd* dcmd = new ClassHistogramDCmd(NULL, false);
+  if (dcmd != NULL) {
+    DCmdMark mark(dcmd);
+    return dcmd->_dcmdparser.num_arguments();
+  } else {
+    return 0;
+  }
+}
+
+ThreadDumpDCmd::ThreadDumpDCmd(outputStream* output, bool heap) :
+                               DCmdWithParser(output, heap),
+  _locks("-l", "print java.util.concurrent locks", "BOOLEAN", false, "false") {
+  _dcmdparser.add_dcmd_option(&_locks);
+}
+
+void ThreadDumpDCmd::execute(TRAPS) {
+  // thread stacks
+  VM_PrintThreads op1(output(), _locks.value());
+  VMThread::execute(&op1);
+
+  // JNI global handles
+  VM_PrintJNI op2(output());
+  VMThread::execute(&op2);
+
+  // Deadlock detection
+  VM_FindDeadlocks op3(output());
+  VMThread::execute(&op3);
+}
+
+int ThreadDumpDCmd::num_arguments() {
+  ResourceMark rm;
+  ThreadDumpDCmd* dcmd = new ThreadDumpDCmd(NULL, false);
+  if (dcmd != NULL) {
+    DCmdMark mark(dcmd);
+    return dcmd->_dcmdparser.num_arguments();
+  } else {
+    return 0;
+  }
+}
--- a/src/share/vm/services/diagnosticCommand.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/services/diagnosticCommand.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -34,10 +34,10 @@
 #include "services/diagnosticArgument.hpp"
 #include "services/diagnosticCommand.hpp"
 #include "services/diagnosticFramework.hpp"
+#include "services/diagnosticCommand_ext.hpp"
 
-class HelpDCmd : public DCmd {
+class HelpDCmd : public DCmdWithParser {
 protected:
-  DCmdParser _dcmdparser;
   DCmdArgument<bool> _all;
   DCmdArgument<char*> _cmd;
 public:
@@ -50,13 +50,7 @@
   }
   static const char* impact() { return "Low: "; }
   static int num_arguments();
-  virtual void parse(CmdLine* line, char delim, TRAPS);
   virtual void execute(TRAPS);
-  virtual void reset(TRAPS);
-  virtual void cleanup();
-  virtual void print_help(outputStream* out);
-  virtual GrowableArray<const char*>* argument_name_array();
-  virtual GrowableArray<DCmdArgumentInfo*>* argument_info_array();
 };
 
 class VersionDCmd : public DCmd {
@@ -68,9 +62,156 @@
   }
   static const char* impact() { return "Low: "; }
   static int num_arguments() { return 0; }
-  virtual void parse(CmdLine* line, char delim, TRAPS) { }
+  virtual void execute(TRAPS);
+};
+
+class CommandLineDCmd : public DCmd {
+public:
+  CommandLineDCmd(outputStream* output, bool heap) : DCmd(output, heap) { }
+  static const char* name() { return "VM.command_line"; }
+  static const char* description() {
+    return "Print the command line used to start this VM instance.";
+  }
+  static const char* impact() { return "Low: "; }
+  static int num_arguments() { return 0; }
+  virtual void execute(TRAPS) {
+    Arguments::print_on(_output);
+  }
+};
+
+// See also: get_system_properties in attachListener.cpp
+class PrintSystemPropertiesDCmd : public DCmd {
+public:
+  PrintSystemPropertiesDCmd(outputStream* output, bool heap) : DCmd(output, heap) { }
+    static const char* name() { return "VM.system_properties"; }
+    static const char* description() {
+      return "Print system properties.";
+    }
+    static const char* impact() {
+      return "Low: ";
+    }
+    static int num_arguments() { return 0; }
+    virtual void execute(TRAPS);
+};
+
+// See also: print_flag in attachListener.cpp
+class PrintVMFlagsDCmd : public DCmdWithParser {
+protected:
+  DCmdArgument<bool> _all;
+public:
+  PrintVMFlagsDCmd(outputStream* output, bool heap);
+  static const char* name() { return "VM.flags"; }
+  static const char* description() {
+    return "Print VM flag options and their current values.";
+  }
+  static const char* impact() {
+    return "Low: ";
+  }
+  static int num_arguments();
+  virtual void execute(TRAPS);
+};
+
+class VMUptimeDCmd : public DCmdWithParser {
+protected:
+  DCmdArgument<bool> _date;
+public:
+  VMUptimeDCmd(outputStream* output, bool heap);
+  static const char* name() { return "VM.uptime"; }
+  static const char* description() {
+    return "Print VM uptime.";
+  }
+  static const char* impact() {
+    return "Low: ";
+  }
+  static int num_arguments();
   virtual void execute(TRAPS);
-  virtual void print_help(outputStream* out) { }
+};
+
+class SystemGCDCmd : public DCmd {
+public:
+  SystemGCDCmd(outputStream* output, bool heap) : DCmd(output, heap) { }
+    static const char* name() { return "GC.run"; }
+    static const char* description() {
+      return "Call java.lang.System.gc().";
+    }
+    static const char* impact() {
+      return "Medium: Depends on Java heap size and content.";
+    }
+    static int num_arguments() { return 0; }
+    virtual void execute(TRAPS);
+};
+
+class RunFinalizationDCmd : public DCmd {
+public:
+  RunFinalizationDCmd(outputStream* output, bool heap) : DCmd(output, heap) { }
+    static const char* name() { return "GC.run_finalization"; }
+    static const char* description() {
+      return "Call java.lang.System.runFinalization().";
+    }
+    static const char* impact() {
+      return "Medium: Depends on Java content.";
+    }
+    static int num_arguments() { return 0; }
+    virtual void execute(TRAPS);
+};
+
+#ifndef SERVICES_KERNEL   // Heap dumping not supported
+// See also: dump_heap in attachListener.cpp
+class HeapDumpDCmd : public DCmdWithParser {
+protected:
+  DCmdArgument<char*> _filename;
+  DCmdArgument<bool>  _all;
+public:
+  HeapDumpDCmd(outputStream* output, bool heap);
+  static const char* name() {
+    return "GC.heap_dump";
+  }
+  static const char* description() {
+    return "Generate a HPROF format dump of the Java heap.";
+  }
+  static const char* impact() {
+    return "High: Depends on Java heap size and content. "
+           "Request a full GC unless the '-all' option is specified.";
+  }
+  static int num_arguments();
+  virtual void execute(TRAPS);
+};
+#endif // SERVICES_KERNEL
+
+// See also: inspeactheap in attachListener.cpp
+class ClassHistogramDCmd : public DCmdWithParser {
+protected:
+  DCmdArgument<bool> _all;
+public:
+  ClassHistogramDCmd(outputStream* output, bool heap);
+  static const char* name() {
+    return "GC.class_histogram";
+  }
+  static const char* description() {
+    return "Provide statistics about the Java heap usage.";
+  }
+  static const char* impact() {
+    return "High: Depends on Java heap size and content.";
+  }
+  static int num_arguments();
+  virtual void execute(TRAPS);
+};
+
+// See also: thread_dump in attachListener.cpp
+class ThreadDumpDCmd : public DCmdWithParser {
+protected:
+  DCmdArgument<bool> _locks;
+public:
+  ThreadDumpDCmd(outputStream* output, bool heap);
+  static const char* name() { return "Thread.print"; }
+  static const char* description() {
+    return "Print all threads with stacktraces.";
+  }
+  static const char* impact() {
+    return "Medium: Depends on the number of threads.";
+  }
+  static int num_arguments();
+  virtual void execute(TRAPS);
 };
 
 #endif // SHARE_VM_SERVICES_DIAGNOSTICCOMMAND_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/services/diagnosticCommand_ext.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. DO
+ * NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_SERVICES_DIAGNOSTICCOMMAND_EXT_HPP
+#define SHARE_VM_SERVICES_DIAGNOSTICCOMMAND_EXT_HPP
+
+#undef HAVE_EXTRA_DCMD
+
+#endif // SHARE_VM_SERVICES_DIAGNOSTICCOMMAND_HPP
--- a/src/share/vm/services/diagnosticFramework.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/services/diagnosticFramework.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -61,7 +61,7 @@
 bool DCmdArgIter::next(TRAPS) {
   if (_len == 0) return false;
   // skipping spaces
-  while (_cursor < _len - 1 && isspace(_buffer[_cursor])) {
+  while (_cursor < _len - 1 && _buffer[_cursor] == _delim) {
     _cursor++;
   }
   // handling end of command line
@@ -226,7 +226,7 @@
 }
 
 void DCmdParser::print_help(outputStream* out, const char* cmd_name) {
-  out->print("\nSyntax : %s %s", cmd_name, _options == NULL ? "" : "[options]");
+  out->print("Syntax : %s %s", cmd_name, _options == NULL ? "" : "[options]");
   GenDCmdArgument* arg = _arguments_list;
   while (arg != NULL) {
     if (arg->is_mandatory()) {
@@ -373,6 +373,30 @@
   }
 }
 
+void DCmdWithParser::parse(CmdLine* line, char delim, TRAPS) {
+  _dcmdparser.parse(line, delim, CHECK);
+}
+
+void DCmdWithParser::print_help(const char* name) {
+  _dcmdparser.print_help(output(), name);
+}
+
+void DCmdWithParser::reset(TRAPS) {
+  _dcmdparser.reset(CHECK);
+}
+
+void DCmdWithParser::cleanup() {
+  _dcmdparser.cleanup();
+}
+
+GrowableArray<const char*>* DCmdWithParser::argument_name_array() {
+  return _dcmdparser.argument_name_array();
+}
+
+GrowableArray<DCmdArgumentInfo*>* DCmdWithParser::argument_info_array() {
+  return _dcmdparser.argument_info_array();
+}
+
 Mutex* DCmdFactory::_dcmdFactory_lock = new Mutex(Mutex::leaf, "DCmdFactory", true);
 
 DCmdFactory* DCmdFactory::factory(const char* name, size_t len) {
--- a/src/share/vm/services/diagnosticFramework.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/services/diagnosticFramework.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -195,6 +195,7 @@
   DCmdParser() {
     _options = NULL;
     _arguments_list = NULL;
+    _delim = ' ';
   }
   void add_dcmd_option(GenDCmdArgument* arg);
   void add_dcmd_argument(GenDCmdArgument* arg);
@@ -241,8 +242,17 @@
   static int num_arguments() { return 0; }
   outputStream* output() { return _output; }
   bool is_heap_allocated()  { return _is_heap_allocated; }
-  virtual void print_help(outputStream* out) { };
-  virtual void parse(CmdLine* line, char delim, TRAPS) { }
+  virtual void print_help(const char* name) {
+    output()->print_cr("Syntax: %s", name);
+  }
+  virtual void parse(CmdLine* line, char delim, TRAPS) {
+    DCmdArgIter iter(line->args_addr(), line->args_len(), delim);
+    bool has_arg = iter.next(CHECK);
+    if (has_arg) {
+      THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(),
+                "Unknown argument in diagnostic command");
+    }
+  }
   virtual void execute(TRAPS) { }
   virtual void reset(TRAPS) { }
   virtual void cleanup() { }
@@ -262,6 +272,25 @@
                                 char delim, TRAPS);
 };
 
+class DCmdWithParser : public DCmd {
+protected:
+  DCmdParser _dcmdparser;
+public:
+  DCmdWithParser (outputStream *output, bool heap=false) : DCmd(output, heap) { }
+  static const char* name() { return "No Name";}
+  static const char* description() { return "No Help";}
+  static const char* disabled_message() { return "Diagnostic command currently disabled"; }
+  static const char* impact() { return "Low: No impact"; }
+  static int num_arguments() { return 0; }
+  virtual void parse(CmdLine *line, char delim, TRAPS);
+  virtual void execute(TRAPS) { }
+  virtual void reset(TRAPS);
+  virtual void cleanup();
+  virtual void print_help(const char* name);
+  virtual GrowableArray<const char*>* argument_name_array();
+  virtual GrowableArray<DCmdArgumentInfo*>* argument_info_array();
+};
+
 class DCmdMark : public StackObj {
   DCmd* _ref;
 public:
@@ -359,4 +388,17 @@
   }
 };
 
+// This class provides a convenient way to register Dcmds, without a need to change
+// management.cpp every time. Body of these two methods resides in
+// diagnosticCommand.cpp
+
+class DCmdRegistrant : public AllStatic {
+
+private:
+    static void register_dcmds();
+    static void register_dcmds_ext();
+
+    friend class Management;
+};
+
 #endif // SHARE_VM_SERVICES_DIAGNOSTICFRAMEWORK_HPP
--- a/src/share/vm/services/g1MemoryPool.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/services/g1MemoryPool.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -78,7 +78,7 @@
   G1MemoryPoolSuper(g1h,
                     "G1 Old Gen",
                     g1h->g1mm()->old_space_committed(), /* init_size */
-                    _undefined_max,
+                    g1h->g1mm()->old_gen_max(),
                     true /* support_usage_threshold */) { }
 
 MemoryUsage G1OldGenPool::get_memory_usage() {
--- a/src/share/vm/services/g1MemoryPool.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/services/g1MemoryPool.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -101,7 +101,7 @@
     return _g1mm->old_space_used();
   }
   size_t max_size() const {
-    return _undefined_max;
+    return _g1mm->old_gen_max();
   }
   MemoryUsage get_memory_usage();
 };
--- a/src/share/vm/services/gcNotifier.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/services/gcNotifier.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -44,7 +44,8 @@
   // Make a copy of the last GC statistics
   // GC may occur between now and the creation of the notification
   int num_pools = MemoryService::num_memory_pools();
-  GCStatInfo* stat = new GCStatInfo(num_pools);
+  // stat is deallocated inside GCNotificationRequest
+  GCStatInfo* stat = new(ResourceObj::C_HEAP) GCStatInfo(num_pools);
   mgr->get_last_gc_stat(stat);
   GCNotificationRequest *request = new GCNotificationRequest(os::javaTimeMillis(),mgr,action,cause,stat);
   addRequest(request);
@@ -179,17 +180,43 @@
 }
 
 void GCNotifier::sendNotification(TRAPS) {
+  GCNotifier::sendNotificationInternal(THREAD);
+  // Clearing pending exception to avoid premature termination of
+  // the service thread
+  if (HAS_PENDING_EXCEPTION) {
+    CLEAR_PENDING_EXCEPTION;
+  }
+}
+
+class NotificationMark : public StackObj {
+  // This class is used in GCNotifier::sendNotificationInternal to ensure that
+  // the GCNotificationRequest object is properly cleaned up, whatever path
+  // is used to exit the method.
+  GCNotificationRequest* _request;
+public:
+  NotificationMark(GCNotificationRequest* r) {
+    _request = r;
+  }
+  ~NotificationMark() {
+    assert(_request != NULL, "Sanity check");
+    delete _request;
+  }
+};
+
+void GCNotifier::sendNotificationInternal(TRAPS) {
   ResourceMark rm(THREAD);
+  HandleMark hm(THREAD);
   GCNotificationRequest *request = getRequest();
-  if(request != NULL) {
-    Handle objGcInfo = createGcInfo(request->gcManager,request->gcStatInfo,THREAD);
+  if (request != NULL) {
+    NotificationMark nm(request);
+    Handle objGcInfo = createGcInfo(request->gcManager, request->gcStatInfo, THREAD);
 
     Handle objName = java_lang_String::create_from_platform_dependent_str(request->gcManager->name(), CHECK);
     Handle objAction = java_lang_String::create_from_platform_dependent_str(request->gcAction, CHECK);
     Handle objCause = java_lang_String::create_from_platform_dependent_str(request->gcCause, CHECK);
 
     klassOop k = Management::sun_management_GarbageCollectorImpl_klass(CHECK);
-    instanceKlassHandle gc_mbean_klass (THREAD, k);
+    instanceKlassHandle gc_mbean_klass(THREAD, k);
 
     instanceOop gc_mbean = request->gcManager->get_memory_manager_instance(THREAD);
     instanceHandle gc_mbean_h(THREAD, gc_mbean);
@@ -212,11 +239,6 @@
                             vmSymbols::createGCNotification_signature(),
                             &args,
                             CHECK);
-    if (HAS_PENDING_EXCEPTION) {
-      CLEAR_PENDING_EXCEPTION;
-    }
-
-    delete request;
   }
 }
 
--- a/src/share/vm/services/gcNotifier.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/services/gcNotifier.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -60,6 +60,7 @@
   static GCNotificationRequest *last_request;
   static void addRequest(GCNotificationRequest *request);
   static GCNotificationRequest *getRequest();
+  static void sendNotificationInternal(TRAPS);
 public:
   static void pushNotification(GCMemoryManager *manager, const char *action, const char *cause);
   static bool has_event();
--- a/src/share/vm/services/management.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/services/management.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -33,6 +33,7 @@
 #include "oops/objArrayKlass.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/arguments.hpp"
+#include "runtime/globals.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/interfaceSupport.hpp"
 #include "runtime/javaCalls.hpp"
@@ -117,8 +118,9 @@
 #endif // SERVICES_KERNEL
   _optional_support.isThreadAllocatedMemorySupported = 1;
 
-  DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl<HelpDCmd>(true, false));
-  DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl<VersionDCmd>(true, false));
+  // Registration of the diagnostic commands
+  DCmdRegistrant::register_dcmds();
+  DCmdRegistrant::register_dcmds_ext();
 }
 
 void Management::initialize(TRAPS) {
@@ -2032,15 +2034,15 @@
   // Make a copy of the last GC statistics
   // GC may occur while constructing the last GC information
   int num_pools = MemoryService::num_memory_pools();
-  GCStatInfo* stat = new GCStatInfo(num_pools);
-  if (mgr->get_last_gc_stat(stat) == 0) {
+  GCStatInfo stat(num_pools);
+  if (mgr->get_last_gc_stat(&stat) == 0) {
     gc_stat->gc_index = 0;
     return;
   }
 
-  gc_stat->gc_index = stat->gc_index();
-  gc_stat->start_time = Management::ticks_to_ms(stat->start_time());
-  gc_stat->end_time = Management::ticks_to_ms(stat->end_time());
+  gc_stat->gc_index = stat.gc_index();
+  gc_stat->start_time = Management::ticks_to_ms(stat.start_time());
+  gc_stat->end_time = Management::ticks_to_ms(stat.end_time());
 
   // Current implementation does not have GC extension attributes
   gc_stat->num_gc_ext_attributes = 0;
@@ -2058,17 +2060,17 @@
   objArrayHandle usage_after_gc_ah(THREAD, au);
 
   for (int i = 0; i < num_pools; i++) {
-    Handle before_usage = MemoryService::create_MemoryUsage_obj(stat->before_gc_usage_for_pool(i), CHECK);
+    Handle before_usage = MemoryService::create_MemoryUsage_obj(stat.before_gc_usage_for_pool(i), CHECK);
     Handle after_usage;
 
-    MemoryUsage u = stat->after_gc_usage_for_pool(i);
+    MemoryUsage u = stat.after_gc_usage_for_pool(i);
     if (u.max_size() == 0 && u.used() > 0) {
       // If max size == 0, this pool is a survivor space.
       // Set max size = -1 since the pools will be swapped after GC.
       MemoryUsage usage(u.init_size(), u.used(), u.committed(), (size_t)-1);
       after_usage = MemoryService::create_MemoryUsage_obj(usage, CHECK);
     } else {
-      after_usage = MemoryService::create_MemoryUsage_obj(stat->after_gc_usage_for_pool(i), CHECK);
+      after_usage = MemoryService::create_MemoryUsage_obj(stat.after_gc_usage_for_pool(i), CHECK);
     }
     usage_before_gc_ah->obj_at_put(i, before_usage());
     usage_after_gc_ah->obj_at_put(i, after_usage());
--- a/src/share/vm/services/management.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/services/management.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -76,6 +76,9 @@
     _stamp.update();
   }
 
+  static jlong begin_vm_creation_time() {
+    return _begin_vm_creation_time->get_value();
+  }
   static jlong vm_init_done_time() {
     return _vm_init_done_time->get_value();
   }
--- a/src/share/vm/services/memoryManager.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/services/memoryManager.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -214,8 +214,8 @@
 
 void GCMemoryManager::initialize_gc_stat_info() {
   assert(MemoryService::num_memory_pools() > 0, "should have one or more memory pools");
-  _last_gc_stat = new GCStatInfo(MemoryService::num_memory_pools());
-  _current_gc_stat = new GCStatInfo(MemoryService::num_memory_pools());
+  _last_gc_stat = new(ResourceObj::C_HEAP) GCStatInfo(MemoryService::num_memory_pools());
+  _current_gc_stat = new(ResourceObj::C_HEAP) GCStatInfo(MemoryService::num_memory_pools());
   // tracking concurrent collections we need two objects: one to update, and one to
   // hold the publicly available "last (completed) gc" information.
 }
--- a/src/share/vm/services/memoryManager.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/services/memoryManager.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -108,7 +108,7 @@
   const char* name()         { return "CodeCacheManager"; }
 };
 
-class GCStatInfo : public CHeapObj {
+class GCStatInfo : public ResourceObj {
 private:
   size_t _index;
   jlong  _start_time;
--- a/src/share/vm/services/threadService.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/services/threadService.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -377,7 +377,7 @@
     }
 
   }
-
+  delete cycle;
   return deadlocks;
 }
 
--- a/src/share/vm/shark/sharkIntrinsics.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/shark/sharkIntrinsics.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -213,17 +213,11 @@
     SharkType::oop_type(),
     "klass");
 
-  Value *klass_part = builder()->CreateAddressOfStructEntry(
-    klass,
-    in_ByteSize(klassOopDesc::klass_part_offset_in_bytes()),
-    SharkType::klass_type(),
-    "klass_part");
-
   state()->push(
     SharkValue::create_jobject(
       builder()->CreateValueOfStructEntry(
-        klass_part,
-        in_ByteSize(Klass::java_mirror_offset_in_bytes()),
+        klass,
+        Klass::java_mirror_offset(),
         SharkType::oop_type(),
         "java_mirror"),
       true));
--- a/src/share/vm/shark/sharkTopLevelBlock.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/shark/sharkTopLevelBlock.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -745,15 +745,9 @@
     SharkType::oop_type(),
     "klass");
 
-  Value *klass_part = builder()->CreateAddressOfStructEntry(
+  Value *access_flags = builder()->CreateValueOfStructEntry(
     klass,
-    in_ByteSize(klassOopDesc::klass_part_offset_in_bytes()),
-    SharkType::klass_type(),
-    "klass_part");
-
-  Value *access_flags = builder()->CreateValueOfStructEntry(
-    klass_part,
-    in_ByteSize(Klass::access_flags_offset_in_bytes()),
+    Klass::access_flags_offset(),
     SharkType::jint_type(),
     "access_flags");
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/trace/traceEventTypes.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_TRACE_TRACE_EVENT_TYPES_HPP
+#define SHARE_VM_TRACE_TRACE_EVENT_TYPES_HPP
+
+/* Empty, just a placeholder for tracing events */
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/trace/traceMacros.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_TRACE_TRACE_MACRO_HPP
+#define SHARE_VM_TRACE_TRACE_MACRO_HPP
+
+#define EVENT_BEGIN(type, name)
+#define EVENT_SET(name, field, value)
+#define EVENT_COMMIT(name, ...)
+#define EVENT_STARTED(name, time)
+#define EVENT_ENDED(name, time)
+#define EVENT_THREAD_EXIT(thread)
+
+#define TRACE_ENABLED 0
+
+#define TRACE_INIT_ID(k)
+#define TRACE_BUFFER void*
+
+#define TRACE_START() true
+#define TRACE_INITIALIZE() 0
+
+#define TRACE_SET_KLASS_TRACE_ID(x1, x2) do { } while (0)
+#define TRACE_DEFINE_KLASS_METHODS typedef int ___IGNORED_hs_trace_type1
+#define TRACE_DEFINE_KLASS_TRACE_ID typedef int ___IGNORED_hs_trace_type2
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/trace/tracing.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_TRACE_TRACING_HPP
+#define SHARE_VM_TRACE_TRACING_HPP
+
+#include "trace/traceMacros.hpp"
+
+#endif
--- a/src/share/vm/utilities/bitMap.inline.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/utilities/bitMap.inline.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -178,8 +178,30 @@
     for (; !(res & 1); res_offset++) {
       res = res >> 1;
     }
-    assert(res_offset >= l_offset &&
-           res_offset < r_offset, "just checking");
+
+#ifdef ASSERT
+    // In the following assert, if r_offset is not bitamp word aligned,
+    // checking that res_offset is strictly less than r_offset is too
+    // strong and will trip the assert.
+    //
+    // Consider the case where l_offset is bit 15 and r_offset is bit 17
+    // of the same map word, and where bits [15:16:17:18] == [00:00:00:01].
+    // All the bits in the range [l_offset:r_offset) are 0.
+    // The loop that calculates res_offset, above, would yield the offset
+    // of bit 18 because it's in the same map word as l_offset and there
+    // is a set bit in that map word above l_offset (i.e. res != NoBits).
+    //
+    // In this case, however, we can assert is that res_offset is strictly
+    // less than size() since we know that there is at least one set bit
+    // at an offset above, but in the same map word as, r_offset.
+    // Otherwise, if r_offset is word aligned then it will not be in the
+    // same map word as l_offset (unless it equals l_offset). So either
+    // there won't be a set bit between l_offset and the end of it's map
+    // word (i.e. res == NoBits), or res_offset will be less than r_offset.
+
+    idx_t limit = is_word_aligned(r_offset) ? r_offset : size();
+    assert(res_offset >= l_offset && res_offset < limit, "just checking");
+#endif // ASSERT
     return MIN2(res_offset, r_offset);
   }
   // skip over all word length 0-bit runs
--- a/src/share/vm/utilities/debug.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/utilities/debug.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -600,19 +600,11 @@
   tty->flush();
 }
 
-
 extern "C" void events() {
   Command c("events");
-  Events::print_last(tty, 50);
+  Events::print();
 }
 
-
-extern "C" void nevents(int n) {
-  Command c("events");
-  Events::print_last(tty, n);
-}
-
-
 // Given a heap address that was valid before the most recent GC, if
 // the oop that used to contain it is still live, prints the new
 // location of the oop and the address. Useful for tracking down
@@ -771,7 +763,7 @@
 
   tty->print_cr("misc.");
   tty->print_cr("  flush()       - flushes the log file");
-  tty->print_cr("  events()      - dump last 50 events");
+  tty->print_cr("  events()      - dump events from ring buffers");
 
 
   tty->print_cr("compiler debugging");
--- a/src/share/vm/utilities/debug.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/utilities/debug.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -33,16 +33,23 @@
 // Simple class to format the ctor arguments into a fixed-sized buffer.
 template <size_t bufsz = 256>
 class FormatBuffer {
-public:
+ public:
   inline FormatBuffer(const char * format, ...);
   inline void append(const char* format, ...);
+  inline void print(const char* format, ...);
+  inline void printv(const char* format, va_list ap);
   operator const char *() const { return _buf; }
 
-private:
+  char* buffer() { return _buf; }
+  int size() { return bufsz; }
+
+ private:
   FormatBuffer(const FormatBuffer &); // prevent copies
 
-private:
+ protected:
   char _buf[bufsz];
+
+  inline FormatBuffer();
 };
 
 template <size_t bufsz>
@@ -54,6 +61,24 @@
 }
 
 template <size_t bufsz>
+FormatBuffer<bufsz>::FormatBuffer() {
+  _buf[0] = '\0';
+}
+
+template <size_t bufsz>
+void FormatBuffer<bufsz>::print(const char * format, ...) {
+  va_list argp;
+  va_start(argp, format);
+  jio_vsnprintf(_buf, bufsz, format, argp);
+  va_end(argp);
+}
+
+template <size_t bufsz>
+void FormatBuffer<bufsz>::printv(const char * format, va_list argp) {
+  jio_vsnprintf(_buf, bufsz, format, argp);
+}
+
+template <size_t bufsz>
 void FormatBuffer<bufsz>::append(const char* format, ...) {
   // Given that the constructor does a vsnprintf we can assume that
   // _buf is already initialized.
--- a/src/share/vm/utilities/decoder.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/utilities/decoder.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -24,80 +24,107 @@
 
 #include "precompiled.hpp"
 #include "prims/jvm.h"
+#include "runtime/mutexLocker.hpp"
+#include "runtime/os.hpp"
 #include "utilities/decoder.hpp"
+#include "utilities/vmError.hpp"
 
-Decoder::decoder_status  Decoder::_decoder_status = Decoder::no_error;
-bool                     Decoder::_initialized = false;
-
-#if !defined(_WINDOWS) && !defined(__APPLE__)
+#if defined(_WINDOWS)
+  #include "decoder_windows.hpp"
+#elif defined(__APPLE__)
+  #include "decoder_machO.hpp"
+#else
+  #include "decoder_elf.hpp"
+#endif
 
-// Implementation of common functionalities among Solaris and Linux
-#include "utilities/elfFile.hpp"
+AbstractDecoder*  Decoder::_shared_decoder = NULL;
+AbstractDecoder*  Decoder::_error_handler_decoder = NULL;
+NullDecoder       Decoder::_do_nothing_decoder;
+Mutex*            Decoder::_shared_decoder_lock = new Mutex(Mutex::native,
+                                "SharedDecoderLock");
+
+AbstractDecoder* Decoder::get_shared_instance() {
+  assert(_shared_decoder_lock != NULL && _shared_decoder_lock->owned_by_self(),
+    "Require DecoderLock to enter");
 
-ElfFile* Decoder::_opened_elf_files = NULL;
+  if (_shared_decoder == NULL) {
+    _shared_decoder = create_decoder();
+  }
+  return _shared_decoder;
+}
 
-bool Decoder::can_decode_C_frame_in_vm() {
-  return true;
+AbstractDecoder* Decoder::get_error_handler_instance() {
+  if (_error_handler_decoder == NULL) {
+    _error_handler_decoder = create_decoder();
+  }
+  return _error_handler_decoder;
 }
 
-void Decoder::initialize() {
-  _initialized = true;
+
+AbstractDecoder* Decoder::create_decoder() {
+  AbstractDecoder* decoder;
+#if defined(_WINDOWS)
+  decoder = new (std::nothrow) WindowsDecoder();
+#elif defined (__APPLE__)
+  decoder = new (std::nothrow)MachODecoder();
+#else
+  decoder = new (std::nothrow)ElfDecoder();
+#endif
+
+  if (decoder == NULL || decoder->has_error()) {
+    if (decoder != NULL) {
+      delete decoder;
+    }
+    decoder = &_do_nothing_decoder;
+  }
+  return decoder;
+}
+
+bool Decoder::decode(address addr, char* buf, int buflen, int* offset, const char* modulepath) {
+  assert(_shared_decoder_lock != NULL, "Just check");
+  bool error_handling_thread = os::current_thread_id() == VMError::first_error_tid;
+  MutexLockerEx locker(error_handling_thread ? NULL : _shared_decoder_lock, true);
+  AbstractDecoder* decoder = error_handling_thread ?
+    get_error_handler_instance(): get_shared_instance();
+  assert(decoder != NULL, "null decoder");
+
+  return decoder->decode(addr, buf, buflen, offset, modulepath);
 }
 
-void Decoder::uninitialize() {
-  if (_opened_elf_files != NULL) {
-    delete _opened_elf_files;
-    _opened_elf_files = NULL;
-  }
-  _initialized = false;
+bool Decoder::demangle(const char* symbol, char* buf, int buflen) {
+  assert(_shared_decoder_lock != NULL, "Just check");
+  bool error_handling_thread = os::current_thread_id() == VMError::first_error_tid;
+  MutexLockerEx locker(error_handling_thread ? NULL : _shared_decoder_lock, true);
+  AbstractDecoder* decoder = error_handling_thread ?
+    get_error_handler_instance(): get_shared_instance();
+  assert(decoder != NULL, "null decoder");
+  return decoder->demangle(symbol, buf, buflen);
 }
 
-Decoder::decoder_status Decoder::decode(address addr, const char* filepath, char *buf, int buflen, int *offset) {
-  if (_decoder_status != no_error) {
-    return _decoder_status;
-  }
+bool Decoder::can_decode_C_frame_in_vm() {
+  assert(_shared_decoder_lock != NULL, "Just check");
+  bool error_handling_thread = os::current_thread_id() == VMError::first_error_tid;
+  MutexLockerEx locker(error_handling_thread ? NULL : _shared_decoder_lock, true);
+  AbstractDecoder* decoder = error_handling_thread ?
+    get_error_handler_instance(): get_shared_instance();
+  assert(decoder != NULL, "null decoder");
+  return decoder->can_decode_C_frame_in_vm();
+}
 
-  ElfFile* file = get_elf_file(filepath);
-  if (_decoder_status != no_error) {
-    return _decoder_status;
+/*
+ * Shutdown shared decoder and replace it with
+ * _do_nothing_decoder. Do nothing with error handler
+ * instance, since the JVM is going down.
+ */
+void Decoder::shutdown() {
+  assert(_shared_decoder_lock != NULL, "Just check");
+  MutexLockerEx locker(_shared_decoder_lock, true);
+
+  if (_shared_decoder != NULL &&
+    _shared_decoder != &_do_nothing_decoder) {
+    delete _shared_decoder;
   }
 
-  const char* symbol = file->decode(addr, offset);
-  if (file->get_status() == out_of_memory) {
-    _decoder_status = out_of_memory;
-    return _decoder_status;
-  } else if (symbol != NULL) {
-    if (!demangle(symbol, buf, buflen)) {
-      jio_snprintf(buf, buflen, "%s", symbol);
-    }
-    return no_error;
-  } else {
-    return symbol_not_found;
-  }
+  _shared_decoder = &_do_nothing_decoder;
 }
 
-ElfFile* Decoder::get_elf_file(const char* filepath) {
-  if (_decoder_status != no_error) {
-    return NULL;
-  }
-  ElfFile* file = _opened_elf_files;
-  while (file != NULL) {
-    if (file->same_elf_file(filepath)) {
-      return file;
-    }
-    file = file->m_next;
-  }
-
-  file = new ElfFile(filepath);
-  if (file == NULL) {
-    _decoder_status = out_of_memory;
-  }
-  if (_opened_elf_files != NULL) {
-    file->m_next = _opened_elf_files;
-  }
-
-  _opened_elf_files = file;
-  return file;
-}
-
-#endif
--- a/src/share/vm/utilities/decoder.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/utilities/decoder.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -23,83 +23,100 @@
  */
 
 
-#ifndef __DECODER_HPP
-#define __DECODER_HPP
+#ifndef SHARE_VM_UTILITIES_DECODER_HPP
+#define SHARE_VM_UTILITIES_DECODER_HPP
 
 #include "memory/allocation.hpp"
-
-#ifdef _WINDOWS
-#include <windows.h>
-#include <imagehlp.h>
-
-// functions needed for decoding symbols
-typedef DWORD (WINAPI *pfn_SymSetOptions)(DWORD);
-typedef BOOL  (WINAPI *pfn_SymInitialize)(HANDLE, PCTSTR, BOOL);
-typedef BOOL  (WINAPI *pfn_SymGetSymFromAddr64)(HANDLE, DWORD64, PDWORD64, PIMAGEHLP_SYMBOL64);
-typedef DWORD (WINAPI *pfn_UndecorateSymbolName)(const char*, char*, DWORD, DWORD);
+#include "runtime/mutex.hpp"
 
-#elif defined(__APPLE__)
-
-#else
-
-class ElfFile;
-
-#endif // _WINDOWS
-
-
-class Decoder: public StackObj {
-
- public:
+class AbstractDecoder : public CHeapObj {
+public:
   // status code for decoding native C frame
   enum decoder_status {
-         no_error,             // successfully decoded frames
+         not_available = -10,  // real decoder is not available
+         no_error = 0,         // successfully decoded frames
          out_of_memory,        // out of memory
          file_invalid,         // invalid elf file
          file_not_found,       // could not found symbol file (on windows), such as jvm.pdb or jvm.map
          helper_not_found,     // could not load dbghelp.dll (Windows only)
          helper_func_error,    // decoding functions not found (Windows only)
-         helper_init_error,    // SymInitialize failed (Windows only)
-         symbol_not_found      // could not find the symbol
+         helper_init_error     // SymInitialize failed (Windows only)
   };
 
- public:
-  Decoder() { initialize(); };
-  ~Decoder() { uninitialize(); };
+  // decode an pc address to corresponding function name and an offset from the beginning of
+  // the function
+  virtual bool decode(address pc, char* buf, int buflen, int* offset,
+    const char* modulepath = NULL) = 0;
+  // demangle a C++ symbol
+  virtual bool demangle(const char* symbol, char* buf, int buflen) = 0;
+  // if the decoder can decode symbols in vm
+  virtual bool can_decode_C_frame_in_vm() const = 0;
+
+  virtual decoder_status status() const {
+    return _decoder_status;
+  }
+
+  virtual bool has_error() const {
+    return is_error(_decoder_status);
+  }
+
+  static bool is_error(decoder_status status) {
+    return (status > 0);
+  }
+
+protected:
+  decoder_status  _decoder_status;
+};
 
+// Do nothing decoder
+class NullDecoder : public AbstractDecoder {
+public:
+  NullDecoder() {
+    _decoder_status = not_available;
+  }
+
+  ~NullDecoder() {};
+
+  virtual bool decode(address pc, char* buf, int buflen, int* offset,
+    const char* modulepath = NULL) {
+    return false;
+  }
+
+  virtual bool demangle(const char* symbol, char* buf, int buflen) {
+    return false;
+  }
+
+  virtual bool can_decode_C_frame_in_vm() const {
+    return false;
+  }
+};
+
+
+class Decoder : AllStatic {
+public:
+  static bool decode(address pc, char* buf, int buflen, int* offset, const char* modulepath = NULL);
+  static bool demangle(const char* symbol, char* buf, int buflen);
   static bool can_decode_C_frame_in_vm();
 
-  static void initialize();
-  static void uninitialize();
-
-#ifdef _WINDOWS
-  static decoder_status    decode(address addr, char *buf, int buflen, int *offset);
-#else
-  static decoder_status    decode(address addr, const char* filepath, char *buf, int buflen, int *offset);
-#endif
-
-  static bool              demangle(const char* symbol, char *buf, int buflen);
-
-  static decoder_status    get_status() { return _decoder_status; };
+  // shutdown shared instance
+  static void shutdown();
+protected:
+  // shared decoder instance, _shared_instance_lock is needed
+  static AbstractDecoder* get_shared_instance();
+  // a private instance for error handler. Error handler can be
+  // triggered almost everywhere, including signal handler, where
+  // no lock can be taken. So the shared decoder can not be used
+  // in this scenario.
+  static AbstractDecoder* get_error_handler_instance();
 
-#if !defined(_WINDOWS) && !defined(__APPLE__)
- private:
-  static ElfFile*         get_elf_file(const char* filepath);
-#endif // _WINDOWS
-
-
- private:
-  static decoder_status     _decoder_status;
-  static bool               _initialized;
+  static AbstractDecoder* create_decoder();
+private:
+  static AbstractDecoder*     _shared_decoder;
+  static AbstractDecoder*     _error_handler_decoder;
+  static NullDecoder          _do_nothing_decoder;
 
-#ifdef _WINDOWS
-  static HMODULE                   _dbghelp_handle;
-  static bool                      _can_decode_in_vm;
-  static pfn_SymGetSymFromAddr64   _pfnSymGetSymFromAddr64;
-  static pfn_UndecorateSymbolName  _pfnUndecorateSymbolName;
-#elif __APPLE__
-#else
-  static ElfFile*                  _opened_elf_files;
-#endif // _WINDOWS
+protected:
+  static Mutex*               _shared_decoder_lock;
 };
 
-#endif // __DECODER_HPP
+#endif // SHARE_VM_UTILITIES_DECODER_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/utilities/decoder_elf.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+
+#if !defined(_WINDOWS) && !defined(__APPLE__)
+#include "decoder_elf.hpp"
+
+ElfDecoder::~ElfDecoder() {
+  if (_opened_elf_files != NULL) {
+    delete _opened_elf_files;
+    _opened_elf_files = NULL;
+  }
+}
+
+bool ElfDecoder::decode(address addr, char *buf, int buflen, int* offset, const char* filepath) {
+  assert(filepath, "null file path");
+  assert(buf != NULL && buflen > 0, "Invalid buffer");
+  if (has_error()) return false;
+  ElfFile* file = get_elf_file(filepath);
+  if (file == NULL) {
+    return false;
+  }
+
+  if (!file->decode(addr, buf, buflen, offset)) {
+    return false;
+  }
+  if (buf[0] != '\0') {
+    demangle(buf, buf, buflen);
+  }
+  return true;
+}
+
+ElfFile* ElfDecoder::get_elf_file(const char* filepath) {
+  ElfFile* file;
+
+  file = _opened_elf_files;
+  while (file != NULL) {
+    if (file->same_elf_file(filepath)) {
+      return file;
+    }
+    file = file->next();
+  }
+
+  file = new (std::nothrow)ElfFile(filepath);
+  if (file != NULL) {
+    if (_opened_elf_files != NULL) {
+      file->set_next(_opened_elf_files);
+    }
+    _opened_elf_files = file;
+  }
+
+  return file;
+}
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/utilities/decoder_elf.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_UTILITIES_DECODER_ELF_HPP
+#define SHARE_VM_UTILITIES_DECODER_ELF_HPP
+
+#if !defined(_WINDOWS) && !defined(__APPLE__)
+
+#include "utilities/decoder.hpp"
+#include "utilities/elfFile.hpp"
+
+class ElfDecoder : public AbstractDecoder {
+
+public:
+  ElfDecoder() {
+    _opened_elf_files = NULL;
+    _decoder_status = no_error;
+  }
+  ~ElfDecoder();
+
+  bool can_decode_C_frame_in_vm() const { return true; }
+
+  bool demangle(const char* symbol, char *buf, int buflen);
+  bool decode(address addr, char *buf, int buflen, int* offset, const char* filepath = NULL);
+
+private:
+  ElfFile*         get_elf_file(const char* filepath);
+
+private:
+  ElfFile*         _opened_elf_files;
+};
+
+#endif
+#endif // SHARE_VM_UTILITIES_DECODER_ELF_HPP
--- a/src/share/vm/utilities/elfFile.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/utilities/elfFile.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -44,7 +44,7 @@
   m_string_tables = NULL;
   m_symbol_tables = NULL;
   m_next = NULL;
-  m_status = Decoder::no_error;
+  m_status = NullDecoder::no_error;
 
   int len = strlen(filepath) + 1;
   m_filepath = (const char*)os::malloc(len * sizeof(char));
@@ -54,10 +54,10 @@
     if (m_file != NULL) {
       load_tables();
     } else {
-      m_status = Decoder::file_not_found;
+      m_status = NullDecoder::file_not_found;
     }
   } else {
-    m_status = Decoder::out_of_memory;
+    m_status = NullDecoder::out_of_memory;
   }
 }
 
@@ -96,41 +96,41 @@
 
 bool ElfFile::load_tables() {
   assert(m_file, "file not open");
-  assert(m_status == Decoder::no_error, "already in error");
+  assert(!NullDecoder::is_error(m_status), "already in error");
 
   // read elf file header
   if (fread(&m_elfHdr, sizeof(m_elfHdr), 1, m_file) != 1) {
-    m_status = Decoder::file_invalid;
+    m_status = NullDecoder::file_invalid;
     return false;
   }
 
   if (!is_elf_file(m_elfHdr)) {
-    m_status = Decoder::file_invalid;
+    m_status = NullDecoder::file_invalid;
     return false;
   }
 
   // walk elf file's section headers, and load string tables
   Elf_Shdr shdr;
   if (!fseek(m_file, m_elfHdr.e_shoff, SEEK_SET)) {
-    if (m_status != Decoder::no_error) return false;
+    if (NullDecoder::is_error(m_status)) return false;
 
     for (int index = 0; index < m_elfHdr.e_shnum; index ++) {
       if (fread((void*)&shdr, sizeof(Elf_Shdr), 1, m_file) != 1) {
-        m_status = Decoder::file_invalid;
+        m_status = NullDecoder::file_invalid;
         return false;
       }
       // string table
       if (shdr.sh_type == SHT_STRTAB) {
         ElfStringTable* table = new (std::nothrow) ElfStringTable(m_file, shdr, index);
         if (table == NULL) {
-          m_status = Decoder::out_of_memory;
+          m_status = NullDecoder::out_of_memory;
           return false;
         }
         add_string_table(table);
       } else if (shdr.sh_type == SHT_SYMTAB || shdr.sh_type == SHT_DYNSYM) {
         ElfSymbolTable* table = new (std::nothrow) ElfSymbolTable(m_file, shdr);
         if (table == NULL) {
-          m_status = Decoder::out_of_memory;
+          m_status = NullDecoder::out_of_memory;
           return false;
         }
         add_symbol_table(table);
@@ -140,32 +140,33 @@
   return true;
 }
 
-const char* ElfFile::decode(address addr, int* offset) {
+bool ElfFile::decode(address addr, char* buf, int buflen, int* offset) {
   // something already went wrong, just give up
-  if (m_status != Decoder::no_error) {
-    return NULL;
+  if (NullDecoder::is_error(m_status)) {
+    return false;
   }
-
   ElfSymbolTable* symbol_table = m_symbol_tables;
   int string_table_index;
   int pos_in_string_table;
   int off = INT_MAX;
   bool found_symbol = false;
   while (symbol_table != NULL) {
-    if (Decoder::no_error == symbol_table->lookup(addr, &string_table_index, &pos_in_string_table, &off)) {
+    if (symbol_table->lookup(addr, &string_table_index, &pos_in_string_table, &off)) {
       found_symbol = true;
     }
     symbol_table = symbol_table->m_next;
   }
-  if (!found_symbol) return NULL;
+  if (!found_symbol) return false;
 
   ElfStringTable* string_table = get_string_table(string_table_index);
+
   if (string_table == NULL) {
-    m_status = Decoder::file_invalid;
-    return NULL;
+    m_status = NullDecoder::file_invalid;
+    return false;
   }
   if (offset) *offset = off;
-  return string_table->string_at(pos_in_string_table);
+
+  return string_table->string_at(pos_in_string_table, buf, buflen);
 }
 
 
--- a/src/share/vm/utilities/elfFile.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/utilities/elfFile.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -22,8 +22,8 @@
  *
  */
 
-#ifndef __ELF_FILE_HPP
-#define __ELF_FILE_HPP
+#ifndef SHARE_VM_UTILITIES_ELF_FILE_HPP
+#define SHARE_VM_UTILITIES_ELF_FILE_HPP
 
 #if !defined(_WINDOWS) && !defined(__APPLE__)
 
@@ -83,12 +83,12 @@
 // part of code to be very defensive, and bait out if anything went wrong.
 
 class ElfFile: public CHeapObj {
-  friend class Decoder;
+  friend class ElfDecoder;
  public:
   ElfFile(const char* filepath);
   ~ElfFile();
 
-  const char* decode(address addr, int* offset);
+  bool decode(address addr, char* buf, int buflen, int* offset);
   const char* filepath() {
     return m_filepath;
   }
@@ -99,7 +99,7 @@
     return (m_filepath && !strcmp(filepath, m_filepath));
   }
 
-  Decoder::decoder_status get_status() {
+  NullDecoder::decoder_status get_status() {
     return m_status;
   }
 
@@ -119,8 +119,9 @@
   // return a string table at specified section index
   ElfStringTable* get_string_table(int index);
 
-  // look up an address and return the nearest symbol
-  const char* look_up(Elf_Shdr shdr, address addr, int* offset);
+protected:
+   ElfFile*  next() const { return m_next; }
+   void set_next(ElfFile* file) { m_next = file; }
 
  protected:
     ElfFile*         m_next;
@@ -131,17 +132,17 @@
   FILE* m_file;
 
   // Elf header
-  Elf_Ehdr            m_elfHdr;
+  Elf_Ehdr                     m_elfHdr;
 
   // symbol tables
-  ElfSymbolTable*     m_symbol_tables;
+  ElfSymbolTable*              m_symbol_tables;
 
   // string tables
-  ElfStringTable*     m_string_tables;
+  ElfStringTable*              m_string_tables;
 
-  Decoder::decoder_status  m_status;
+  NullDecoder::decoder_status  m_status;
 };
 
 #endif // _WINDOWS
 
-#endif // __ELF_FILE_HPP
+#endif // SHARE_VM_UTILITIES_ELF_FILE_HPP
--- a/src/share/vm/utilities/elfStringTable.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/utilities/elfStringTable.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -38,7 +38,7 @@
   m_index = index;
   m_next = NULL;
   m_file = file;
-  m_status = Decoder::no_error;
+  m_status = NullDecoder::no_error;
 
   // try to load the string table
   long cur_offset = ftell(file);
@@ -48,7 +48,7 @@
     if (fseek(file, shdr.sh_offset, SEEK_SET) ||
       fread((void*)m_table, shdr.sh_size, 1, file) != 1 ||
       fseek(file, cur_offset, SEEK_SET)) {
-      m_status = Decoder::file_invalid;
+      m_status = NullDecoder::file_invalid;
       os::free((void*)m_table);
       m_table = NULL;
     }
@@ -67,22 +67,23 @@
   }
 }
 
-const char* ElfStringTable::string_at(int pos) {
-  if (m_status != Decoder::no_error) {
-    return NULL;
+bool ElfStringTable::string_at(int pos, char* buf, int buflen) {
+  if (NullDecoder::is_error(m_status)) {
+    return false;
   }
   if (m_table != NULL) {
-    return (const char*)(m_table + pos);
+    jio_snprintf(buf, buflen, "%s", (const char*)(m_table + pos));
+    return true;
   } else {
     long cur_pos = ftell(m_file);
     if (cur_pos == -1 ||
       fseek(m_file, m_shdr.sh_offset + pos, SEEK_SET) ||
-      fread(m_symbol, 1, MAX_SYMBOL_LEN, m_file) <= 0 ||
+      fread(buf, 1, buflen, m_file) <= 0 ||
       fseek(m_file, cur_pos, SEEK_SET)) {
-      m_status = Decoder::file_invalid;
-      return NULL;
+      m_status = NullDecoder::file_invalid;
+      return false;
     }
-    return (const char*)m_symbol;
+    return true;
   }
 }
 
--- a/src/share/vm/utilities/elfStringTable.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/utilities/elfStringTable.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -22,8 +22,8 @@
  *
  */
 
-#ifndef __ELF_STRING_TABLE_HPP
-#define __ELF_STRING_TABLE_HPP
+#ifndef SHARE_VM_UTILITIES_ELF_STRING_TABLE_HPP
+#define SHARE_VM_UTILITIES_ELF_STRING_TABLE_HPP
 
 #if !defined(_WINDOWS) && !defined(__APPLE__)
 
@@ -35,9 +35,6 @@
 // The string table represents a string table section in an elf file.
 // Whenever there is enough memory, it will load whole string table as
 // one blob. Otherwise, it will load string from file when requested.
-
-#define MAX_SYMBOL_LEN  256
-
 class ElfStringTable: CHeapObj {
   friend class ElfFile;
  public:
@@ -48,10 +45,10 @@
   int index() { return m_index; };
 
   // get string at specified offset
-  const char* string_at(int offset);
+  bool string_at(int offset, char* buf, int buflen);
 
   // get status code
-  Decoder::decoder_status get_status() { return m_status; };
+  NullDecoder::decoder_status get_status() { return m_status; };
 
  protected:
   ElfStringTable*        m_next;
@@ -69,13 +66,10 @@
   // section header
   Elf_Shdr                 m_shdr;
 
-  // buffer for reading individual string
-  char                     m_symbol[MAX_SYMBOL_LEN];
-
   // error code
-  Decoder::decoder_status  m_status;
+  NullDecoder::decoder_status  m_status;
 };
 
-#endif // _WINDOWS
+#endif // _WINDOWS and _APPLE
 
-#endif // __ELF_STRING_TABLE_HPP
+#endif // SHARE_VM_UTILITIES_ELF_STRING_TABLE_HPP
--- a/src/share/vm/utilities/elfSymbolTable.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/utilities/elfSymbolTable.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -34,7 +34,7 @@
   m_symbols = NULL;
   m_next = NULL;
   m_file = file;
-  m_status = Decoder::no_error;
+  m_status = NullDecoder::no_error;
 
   // try to load the string table
   long cur_offset = ftell(file);
@@ -45,16 +45,16 @@
       if (fseek(file, shdr.sh_offset, SEEK_SET) ||
         fread((void*)m_symbols, shdr.sh_size, 1, file) != 1 ||
         fseek(file, cur_offset, SEEK_SET)) {
-        m_status = Decoder::file_invalid;
+        m_status = NullDecoder::file_invalid;
         os::free(m_symbols);
         m_symbols = NULL;
       }
     }
-    if (m_status == Decoder::no_error) {
+    if (!NullDecoder::is_error(m_status)) {
       memcpy(&m_shdr, &shdr, sizeof(Elf_Shdr));
     }
   } else {
-    m_status = Decoder::file_invalid;
+    m_status = NullDecoder::file_invalid;
   }
 }
 
@@ -68,13 +68,13 @@
   }
 }
 
-Decoder::decoder_status ElfSymbolTable::lookup(address addr, int* stringtableIndex, int* posIndex, int* offset) {
+bool ElfSymbolTable::lookup(address addr, int* stringtableIndex, int* posIndex, int* offset) {
   assert(stringtableIndex, "null string table index pointer");
   assert(posIndex, "null string table offset pointer");
   assert(offset, "null offset pointer");
 
-  if (m_status != Decoder::no_error) {
-    return m_status;
+  if (NullDecoder::is_error(m_status)) {
+    return false;
   }
 
   address pc = 0;
@@ -97,8 +97,8 @@
     long cur_pos;
     if ((cur_pos = ftell(m_file)) == -1 ||
       fseek(m_file, m_shdr.sh_offset, SEEK_SET)) {
-      m_status = Decoder::file_invalid;
-      return m_status;
+      m_status = NullDecoder::file_invalid;
+      return false;
     }
 
     Elf_Sym sym;
@@ -114,13 +114,13 @@
           }
         }
       } else {
-        m_status = Decoder::file_invalid;
-        return m_status;
+        m_status = NullDecoder::file_invalid;
+        return false;
       }
     }
     fseek(m_file, cur_pos, SEEK_SET);
   }
-  return m_status;
+  return true;
 }
 
 #endif // _WINDOWS
--- a/src/share/vm/utilities/elfSymbolTable.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/utilities/elfSymbolTable.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -22,8 +22,8 @@
  *
  */
 
-#ifndef __ELF_SYMBOL_TABLE_HPP
-#define __ELF_SYMBOL_TABLE_HPP
+#ifndef SHARE_VM_UTILITIES_ELF_SYMBOL_TABLE_HPP
+#define SHARE_VM_UTILITIES_ELF_SYMBOL_TABLE_HPP
 
 #if !defined(_WINDOWS) && !defined(__APPLE__)
 
@@ -45,9 +45,9 @@
   ~ElfSymbolTable();
 
   // search the symbol that is nearest to the specified address.
-  Decoder::decoder_status lookup(address addr, int* stringtableIndex, int* posIndex, int* offset);
+  bool lookup(address addr, int* stringtableIndex, int* posIndex, int* offset);
 
-  Decoder::decoder_status get_status() { return m_status; };
+  NullDecoder::decoder_status get_status() { return m_status; };
 
  protected:
   ElfSymbolTable*  m_next;
@@ -62,9 +62,9 @@
   // section header
   Elf_Shdr            m_shdr;
 
-  Decoder::decoder_status  m_status;
+  NullDecoder::decoder_status  m_status;
 };
 
-#endif // _WINDOWS
+#endif // _WINDOWS and _APPLE
 
-#endif // __ELF_SYMBOL_TABLE_HPP
+#endif // SHARE_VM_UTILITIES_ELF_SYMBOL_TABLE_HPP
--- a/src/share/vm/utilities/events.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/utilities/events.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -26,6 +26,7 @@
 #include "memory/allocation.inline.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "runtime/osThread.hpp"
+#include "runtime/threadCritical.hpp"
 #include "runtime/threadLocalStorage.hpp"
 #include "runtime/timer.hpp"
 #include "utilities/events.hpp"
@@ -43,184 +44,44 @@
 #endif
 
 
-#ifndef PRODUCT
-
-////////////////////////////////////////////////////////////////////////////
-// Event
-
-typedef u4 EventID;
-
-class Event VALUE_OBJ_CLASS_SPEC  {
- private:
-  jlong       _time_tick;
-  intx        _thread_id;
-  const char* _format;
-  int         _indent;
-  intptr_t    _arg_1;
-  intptr_t    _arg_2;
-  intptr_t    _arg_3;
-
-  // only EventBuffer::add_event() can assign event id
-  friend class EventBuffer;
-  EventID     _id;
-
- public:
-
-  void clear() { _format = NULL; }
-
-  EventID id() const { return _id; }
-
-  void fill(int indent, const char* format, intptr_t arg_1, intptr_t arg_2, intptr_t arg_3) {
-    _format = format;
-    _arg_1  = arg_1;
-    _arg_2  = arg_2;
-    _arg_3  = arg_3;
-
-    _indent = indent;
-
-    _thread_id = os::current_thread_id();
-    _time_tick = os::elapsed_counter();
-  }
-
-  void print_on(outputStream *st) {
-    if (_format == NULL) return;
-    st->print("  %d", _thread_id);
-    st->print("  %3.2g   ", (double)_time_tick / os::elapsed_frequency());
-    st->fill_to(20);
-    for (int index = 0; index < _indent; index++) {
-      st->print("| ");
-    }
-    st->print_cr(_format, _arg_1, _arg_2, _arg_3);
-  }
-};
-
-////////////////////////////////////////////////////////////////////////////
-// EventBuffer
-//
-// Simple lock-free event queue. Every event has a unique 32-bit id.
-// It's fine if two threads add events at the same time, because they
-// will get different event id, and then write to different buffer location.
-// However, it is assumed that add_event() is quick enough (or buffer size
-// is big enough), so when one thread is adding event, there can't be more
-// than "size" events created by other threads; otherwise we'll end up having
-// two threads writing to the same location.
-
-class EventBuffer : AllStatic {
- private:
-  static Event* buffer;
-  static int    size;
-  static jint   indent;
-  static volatile EventID _current_event_id;
-
-  static EventID get_next_event_id() {
-    return (EventID)Atomic::add(1, (jint*)&_current_event_id);
-  }
-
- public:
-  static void inc_indent() { Atomic::inc(&indent); }
-  static void dec_indent() { Atomic::dec(&indent); }
+EventLog* Events::_logs = NULL;
+StringEventLog* Events::_messages = NULL;
+StringEventLog* Events::_exceptions = NULL;
+StringEventLog* Events::_deopt_messages = NULL;
 
-  static bool get_event(EventID id, Event* event) {
-    int index = (int)(id % size);
-    if (buffer[index].id() == id) {
-      memcpy(event, &buffer[index], sizeof(Event));
-      // check id again; if buffer[index] is being updated by another thread,
-      // event->id() will contain different value.
-      return (event->id() == id);
-    } else {
-      // id does not match - id is invalid, or event is overwritten
-      return false;
-    }
-  }
-
-  // add a new event to the queue; if EventBuffer is full, this call will
-  // overwrite the oldest event in the queue
-  static EventID add_event(const char* format,
-                           intptr_t arg_1, intptr_t arg_2, intptr_t arg_3) {
-    // assign a unique id
-    EventID id = get_next_event_id();
-
-    // event will be copied to buffer[index]
-    int index = (int)(id % size);
-
-    // first, invalidate id, buffer[index] can't have event with id = index + 2
-    buffer[index]._id = index + 2;
-
-    // make sure everyone has seen that buffer[index] is invalid
-    OrderAccess::fence();
-
-    // ... before updating its value
-    buffer[index].fill(indent, format, arg_1, arg_2, arg_3);
-
-    // finally, set up real event id, now buffer[index] contains valid event
-    OrderAccess::release_store(&(buffer[index]._id), id);
-
-    return id;
-  }
-
-  static void print_last(outputStream *st, int number) {
-    st->print_cr("[Last %d events in the event buffer]", number);
-    st->print_cr("-<thd>-<elapsed sec>-<description>---------------------");
+EventLog::EventLog() {
+  // This normally done during bootstrap when we're only single
+  // threaded but use a ThreadCritical to ensure inclusion in case
+  // some are created slightly late.
+  ThreadCritical tc;
+  _next = Events::_logs;
+  Events::_logs = this;
+}
 
-    int count = 0;
-    EventID id = _current_event_id;
-    while (count < number) {
-      Event event;
-      if (get_event(id, &event)) {
-         event.print_on(st);
-      }
-      id--;
-      count++;
-    }
-  }
-
-  static void print_all(outputStream* st) {
-    print_last(st, size);
-  }
-
-  static void init() {
-    // Allocate the event buffer
-    size   = EventLogLength;
-    buffer = NEW_C_HEAP_ARRAY(Event, size);
-
-    _current_event_id = 0;
-
-    // Clear the event buffer
-    for (int index = 0; index < size; index++) {
-      buffer[index]._id = index + 1;       // index + 1 is invalid id
-      buffer[index].clear();
-    }
-  }
-};
-
-Event*           EventBuffer::buffer;
-int              EventBuffer::size;
-volatile EventID EventBuffer::_current_event_id;
-int              EventBuffer::indent;
-
-////////////////////////////////////////////////////////////////////////////
-// Events
-
-// Events::log() is safe for signal handlers
-void Events::log(const char* format, ...) {
-  if (LogEvents) {
-    va_list ap;
-    va_start(ap, format);
-    intptr_t arg_1 = va_arg(ap, intptr_t);
-    intptr_t arg_2 = va_arg(ap, intptr_t);
-    intptr_t arg_3 = va_arg(ap, intptr_t);
-    va_end(ap);
-
-    EventBuffer::add_event(format, arg_1, arg_2, arg_3);
+// For each registered event logger, print out the current contents of
+// the buffer.  This is normally called when the JVM is crashing.
+void Events::print_all(outputStream* out) {
+  EventLog* log = _logs;
+  while (log != NULL) {
+    log->print_log_on(out);
+    log = log->next();
   }
 }
 
-void Events::print_all(outputStream *st) {
-  EventBuffer::print_all(st);
+void Events::print() {
+  print_all(tty);
 }
 
-void Events::print_last(outputStream *st, int number) {
-  EventBuffer::print_last(st, number);
+void Events::init() {
+  if (LogEvents) {
+    _messages = new StringEventLog("Events");
+    _exceptions = new StringEventLog("Internal exceptions");
+    _deopt_messages = new StringEventLog("Deoptimization events");
+  }
+}
+
+void eventlog_init() {
+  Events::init();
 }
 
 ///////////////////////////////////////////////////////////////////////////
@@ -230,37 +91,17 @@
   if (LogEvents) {
     va_list ap;
     va_start(ap, format);
-    intptr_t arg_1 = va_arg(ap, intptr_t);
-    intptr_t arg_2 = va_arg(ap, intptr_t);
-    intptr_t arg_3 = va_arg(ap, intptr_t);
+    // Save a copy of begin message and log it.
+    _buffer.printv(format, ap);
+    Events::log(NULL, _buffer);
     va_end(ap);
-
-    EventBuffer::add_event(format, arg_1, arg_2, arg_3);
-    EventBuffer::inc_indent();
   }
 }
 
 EventMark::~EventMark() {
   if (LogEvents) {
-    EventBuffer::dec_indent();
-    EventBuffer::add_event("done", 0, 0, 0);
+    // Append " done" to the begin message and log it
+    _buffer.append(" done");
+    Events::log(NULL, _buffer);
   }
 }
-
-///////////////////////////////////////////////////////////////////////////
-
-void eventlog_init() {
-  EventBuffer::init();
-}
-
-int print_all_events(outputStream *st) {
-  EventBuffer::print_all(st);
-  return 1;
-}
-
-#else
-
-void eventlog_init() {}
-int print_all_events(outputStream *st) { return 0; }
-
-#endif // PRODUCT
--- a/src/share/vm/utilities/events.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/utilities/events.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -26,47 +26,266 @@
 #define SHARE_VM_UTILITIES_EVENTS_HPP
 
 #include "memory/allocation.hpp"
+#include "runtime/mutexLocker.hpp"
+#include "runtime/thread.hpp"
 #include "utilities/top.hpp"
+#include "utilities/vmError.hpp"
 
 // Events and EventMark provide interfaces to log events taking place in the vm.
 // This facility is extremly useful for post-mortem debugging. The eventlog
 // often provides crucial information about events leading up to the crash.
 //
-// All arguments past the format string must be passed as an intptr_t.
-//
-// To log a single event use:
-//    Events::log("New nmethod has been created " INTPTR_FORMAT, nm);
-//
-// To log a block of events use:
-//    EventMark m("GarbageCollecting %d", (intptr_t)gc_number);
-//
-// The constructor to eventlog indents the eventlog until the
-// destructor has been executed.
-//
-// IMPLEMENTATION RESTRICTION:
-//   Max 3 arguments are saved for each logged event.
-//
+// Abstractly the logs can record whatever they way but normally they
+// would record at least a timestamp and the current Thread, along
+// with whatever data they need in a ring buffer.  Commonly fixed
+// length text messages are recorded for simplicity but other
+// strategies could be used.  Several logs are provided by default but
+// new instances can be created as needed.
+
+// The base event log dumping class that is registered for dumping at
+// crash time.  This is a very generic interface that is mainly here
+// for completeness.  Normally the templated EventLogBase would be
+// subclassed to provide different log types.
+class EventLog : public CHeapObj {
+  friend class Events;
+
+ private:
+  EventLog* _next;
+
+  EventLog* next() const { return _next; }
+
+ public:
+  // Automatically registers the log so that it will be printed during
+  // crashes.
+  EventLog();
+
+  virtual void print_log_on(outputStream* out) = 0;
+};
+
+
+// A templated subclass of EventLog that provides basic ring buffer
+// functionality.  Most event loggers should subclass this, possibly
+// providing a more featureful log function if the existing copy
+// semantics aren't appropriate.  The name is used as the label of the
+// log when it is dumped during a crash.
+template <class T> class EventLogBase : public EventLog {
+  template <class X> class EventRecord {
+   public:
+    double  timestamp;
+    Thread* thread;
+    X       data;
+  };
+
+ protected:
+  Mutex           _mutex;
+  const char*     _name;
+  int             _length;
+  int             _index;
+  int             _count;
+  EventRecord<T>* _records;
+
+ public:
+  EventLogBase<T>(const char* name, int length = LogEventsBufferEntries):
+    _name(name),
+    _length(length),
+    _count(0),
+    _index(0),
+    _mutex(Mutex::event, name) {
+    _records = new EventRecord<T>[length];
+  }
+
+  double fetch_timestamp() {
+    return os::elapsedTime();
+  }
+
+  // move the ring buffer to next open slot and return the index of
+  // the slot to use for the current message.  Should only be called
+  // while mutex is held.
+  int compute_log_index() {
+    int index = _index;
+    if (_count < _length) _count++;
+    _index++;
+    if (_index >= _length) _index = 0;
+    return index;
+  }
+
+  bool should_log() {
+    // Don't bother adding new entries when we're crashing.  This also
+    // avoids mutating the ring buffer when printing the log.
+    return !VMError::fatal_error_in_progress();
+  }
+
+  // Print the contents of the log
+  void print_log_on(outputStream* out);
+
+ private:
+  void print_log_impl(outputStream* out);
+
+  // Print a single element.  A templated implementation might need to
+  // be declared by subclasses.
+  void print(outputStream* out, T& e);
+
+  void print(outputStream* out, EventRecord<T>& e) {
+    out->print("Event: %.3f ", e.timestamp);
+    if (e.thread != NULL) {
+      out->print("Thread " INTPTR_FORMAT " ", e.thread);
+    }
+    print(out, e.data);
+  }
+};
+
+// A simple wrapper class for fixed size text messages.
+class StringLogMessage : public FormatBuffer<132> {
+ public:
+  // Wrap this buffer in a stringStream.
+  stringStream stream() {
+    return stringStream(_buf, sizeof(_buf));
+  }
+};
+
+// A simple ring buffer of fixed size text messages.
+class StringEventLog : public EventLogBase<StringLogMessage> {
+ public:
+  StringEventLog(const char* name, int count = LogEventsBufferEntries) : EventLogBase<StringLogMessage>(name, count) {}
+
+  void logv(Thread* thread, const char* format, va_list ap) {
+    if (!should_log()) return;
+
+    double timestamp = fetch_timestamp();
+    MutexLockerEx ml(&_mutex, Mutex::_no_safepoint_check_flag);
+    int index = compute_log_index();
+    _records[index].thread = thread;
+    _records[index].timestamp = timestamp;
+    _records[index].data.printv(format, ap);
+  }
+
+  void log(Thread* thread, const char* format, ...) {
+    va_list ap;
+    va_start(ap, format);
+    logv(thread, format, ap);
+    va_end(ap);
+  }
+
+};
+
+
 
 class Events : AllStatic {
- public:
-  // Logs an event, format as printf
-  static void log(const char* format, ...) PRODUCT_RETURN;
+  friend class EventLog;
+
+ private:
+  static EventLog* _logs;
+
+  // A log for generic messages that aren't well categorized.
+  static StringEventLog* _messages;
+
+  // A log for internal exception related messages, like internal
+  // throws and implicit exceptions.
+  static StringEventLog* _exceptions;
+
+  // Deoptization related messages
+  static StringEventLog* _deopt_messages;
 
-  // Prints all events in the buffer
-  static void print_all(outputStream* st) PRODUCT_RETURN;
+ public:
+  static void print_all(outputStream* out);
+
+  // Dump all events to the tty
+  static void print();
 
-  // Prints last number events from the event buffer
-  static void print_last(outputStream *st, int number) PRODUCT_RETURN;
+  // Logs a generic message with timestamp and format as printf.
+  static void log(Thread* thread, const char* format, ...);
+
+  // Log exception related message
+  static void log_exception(Thread* thread, const char* format, ...);
+
+  static void log_deopt_message(Thread* thread, const char* format, ...);
+
+  // Register default loggers
+  static void init();
 };
 
+
+inline void Events::log(Thread* thread, const char* format, ...) {
+  if (LogEvents) {
+    va_list ap;
+    va_start(ap, format);
+    _messages->logv(thread, format, ap);
+    va_end(ap);
+  }
+}
+
+inline void Events::log_exception(Thread* thread, const char* format, ...) {
+  if (LogEvents) {
+    va_list ap;
+    va_start(ap, format);
+    _exceptions->logv(thread, format, ap);
+    va_end(ap);
+  }
+}
+
+inline void Events::log_deopt_message(Thread* thread, const char* format, ...) {
+  if (LogEvents) {
+    va_list ap;
+    va_start(ap, format);
+    _deopt_messages->logv(thread, format, ap);
+    va_end(ap);
+  }
+}
+
+
+template <class T>
+inline void EventLogBase<T>::print_log_on(outputStream* out) {
+  if (ThreadLocalStorage::get_thread_slow() == NULL) {
+    // Not a regular Java thread so don't bother locking
+    print_log_impl(out);
+  } else {
+    MutexLockerEx ml(&_mutex, Mutex::_no_safepoint_check_flag);
+    print_log_impl(out);
+  }
+}
+
+// Dump the ring buffer entries that current have entries.
+template <class T>
+inline void EventLogBase<T>::print_log_impl(outputStream* out) {
+  out->print_cr("%s (%d events):", _name, _count);
+  if (_count == 0) {
+    out->print_cr("No events");
+    out->cr();
+    return;
+  }
+
+  if (_count < _length) {
+    for (int i = 0; i < _count; i++) {
+      print(out, _records[i]);
+    }
+  } else {
+    for (int i = _index; i < _length; i++) {
+      print(out, _records[i]);
+    }
+    for (int i = 0; i < _index; i++) {
+      print(out, _records[i]);
+    }
+  }
+  out->cr();
+}
+
+// Implement a printing routine for the StringLogMessage
+template <>
+inline void EventLogBase<StringLogMessage>::print(outputStream* out, StringLogMessage& lm) {
+  out->print_raw(lm);
+  out->cr();
+}
+
+// Place markers for the beginning and end up of a set of events.
+// These end up in the default log.
 class EventMark : public StackObj {
+  StringLogMessage _buffer;
+
  public:
   // log a begin event, format as printf
-  EventMark(const char* format, ...) PRODUCT_RETURN;
+  EventMark(const char* format, ...);
   // log an end event
-  ~EventMark() PRODUCT_RETURN;
+  ~EventMark();
 };
 
-int print_all_events(outputStream *st);
-
 #endif // SHARE_VM_UTILITIES_EVENTS_HPP
--- a/src/share/vm/utilities/exceptions.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/utilities/exceptions.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -95,8 +95,7 @@
 #endif // ASSERT
 
   if (thread->is_VM_thread()
-	  // TODO(tw): May we do this?
-      /*|| thread->is_Compiler_thread()*/ ) {
+      || thread->is_Compiler_thread() ) {
     // We do not care what kind of exception we get for the vm-thread or a thread which
     // is compiling.  We just install a dummy exception object
     thread->set_pending_exception(Universe::vm_exception(), file, line);
@@ -119,8 +118,7 @@
   }
 
   if (thread->is_VM_thread()
-	  // TODO(tw): May we do this?
-     /* || thread->is_Compiler_thread()*/ ) {
+      || thread->is_Compiler_thread() ) {
     // We do not care what kind of exception we get for the vm-thread or a thread which
     // is compiling.  We just install a dummy exception object
     thread->set_pending_exception(Universe::vm_exception(), file, line);
@@ -162,7 +160,7 @@
   thread->set_pending_exception(h_exception(), file, line);
 
   // vm log
-  Events::log("throw_exception " INTPTR_FORMAT, (address)h_exception());
+  Events::log_exception(thread, "Threw " INTPTR_FORMAT " at %s:%d", (address)h_exception(), file, line);
 }
 
 
--- a/src/share/vm/utilities/exceptions.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/utilities/exceptions.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -189,6 +189,13 @@
 #define CHECK_NULL                               CHECK_(NULL)
 #define CHECK_false                              CHECK_(false)
 
+#define CHECK_AND_CLEAR                         THREAD); if (HAS_PENDING_EXCEPTION) { CLEAR_PENDING_EXCEPTION; return;        } (0
+#define CHECK_AND_CLEAR_(result)                THREAD); if (HAS_PENDING_EXCEPTION) { CLEAR_PENDING_EXCEPTION; return result; } (0
+#define CHECK_AND_CLEAR_0                       CHECK_AND_CLEAR_(0)
+#define CHECK_AND_CLEAR_NH                      CHECK_AND_CLEAR_(Handle())
+#define CHECK_AND_CLEAR_NULL                    CHECK_AND_CLEAR_(NULL)
+#define CHECK_AND_CLEAR_false                   CHECK_AND_CLEAR_(false)
+
 // The THROW... macros should be used to throw an exception. They require a THREAD variable to be
 // visible within the scope containing the THROW. Usually this is achieved by declaring the function
 // with a TRAPS argument.
@@ -258,7 +265,6 @@
     ShouldNotReachHere();                  \
   } (0
 
-
 // ExceptionMark is a stack-allocated helper class for local exception handling.
 // It is used with the EXCEPTION_MARK macro.
 
--- a/src/share/vm/utilities/globalDefinitions.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/utilities/globalDefinitions.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -175,6 +175,9 @@
 const int MICROUNITS    = 1000000;      // micro units per base unit
 const int NANOUNITS     = 1000000000;   // nano units per base unit
 
+const jlong NANOSECS_PER_SEC      = CONST64(1000000000);
+const jint  NANOSECS_PER_MILLISEC = 1000000;
+
 inline const char* proper_unit_for_byte_size(size_t s) {
   if (s >= 10*M) {
     return "M";
@@ -295,6 +298,11 @@
 const juint   max_juint   = (juint)-1;   // 0xFFFFFFFF largest juint
 const julong  max_julong  = (julong)-1;  // 0xFF....FF largest julong
 
+typedef jbyte  s1;
+typedef jshort s2;
+typedef jint   s4;
+typedef jlong  s8;
+
 //----------------------------------------------------------------------------------------------------
 // JVM spec restrictions
 
--- a/src/share/vm/utilities/globalDefinitions_gcc.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/utilities/globalDefinitions_gcc.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -87,14 +87,7 @@
 #endif
 #ifdef __APPLE__
   #include <AvailabilityMacros.h>
-  #if (MAC_OS_X_VERSION_MAX_ALLOWED <= MAC_OS_X_VERSION_10_4)
-    // Mac OS X 10.4 defines EFL_AC and EFL_ID,
-    // which conflict with hotspot variable names.
-    //
-    // This has been fixed in Mac OS X 10.5.
-    #undef EFL_AC
-    #undef EFL_ID
-  #endif
+  #include <mach/mach.h>
 #endif
 #include <sys/time.h>
 #endif // LINUX || _ALLBSD_SOURCE
--- a/src/share/vm/utilities/globalDefinitions_visCPP.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/utilities/globalDefinitions_visCPP.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -130,6 +130,9 @@
 //----------------------------------------------------------------------------------------------------
 // Non-standard stdlib-like stuff:
 inline int strcasecmp(const char *s1, const char *s2) { return _stricmp(s1,s2); }
+inline int strncasecmp(const char *s1, const char *s2, size_t n) {
+  return _strnicmp(s1,s2,n);
+}
 
 
 //----------------------------------------------------------------------------------------------------
--- a/src/share/vm/utilities/hashtable.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/utilities/hashtable.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -183,7 +183,6 @@
 
   // Accessor
   int entry_size() const { return _entry_size; }
-  int table_size() { return _table_size; }
 
   // The following method is MT-safe and may be used with caution.
   BasicHashtableEntry* bucket(int i);
@@ -195,6 +194,7 @@
   BasicHashtableEntry* new_entry(unsigned int hashValue);
 
 public:
+  int table_size() { return _table_size; }
   void set_entry(int index, BasicHashtableEntry* entry);
 
   void add_entry(int index, BasicHashtableEntry* entry);
--- a/src/share/vm/utilities/ostream.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/utilities/ostream.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1021,7 +1021,7 @@
 
 void networkStream::flush() {
   if (size() != 0) {
-    int result = os::raw_send(_socket, (char *)base(), (int)size(), 0);
+    int result = os::raw_send(_socket, (char *)base(), size(), 0);
     assert(result != -1, "connection error");
     assert(result == (int)size(), "didn't send enough data");
   }
--- a/src/share/vm/utilities/preserveException.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/utilities/preserveException.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -32,9 +32,9 @@
   thread     = Thread::current();
   _thread    = thread;
   _preserved_exception_oop = Handle(thread, _thread->pending_exception());
-  _thread->clear_pending_exception(); // Needed to avoid infinite recursion
   _preserved_exception_line = _thread->exception_line();
   _preserved_exception_file = _thread->exception_file();
+  _thread->clear_pending_exception(); // Needed to avoid infinite recursion
 }
 
 
--- a/src/share/vm/utilities/quickSort.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/utilities/quickSort.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -93,8 +93,7 @@
   return compare_arrays(arrayToSort, expectedResult, length);
 }
 
-bool QuickSort::test_quick_sort() {
-  tty->print_cr("test_quick_sort");
+void QuickSort::test_quick_sort() {
   {
     int* test_array = NULL;
     int* expected_array = NULL;
@@ -214,7 +213,6 @@
     delete[] test_array;
     delete[] expected_array;
   }
-  return true;
 }
 
 #endif
--- a/src/share/vm/utilities/quickSort.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/utilities/quickSort.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -130,7 +130,7 @@
   static void print_array(const char* prefix, int* array, int length);
   static bool compare_arrays(int* actual, int* expected, int length);
   template <class C> static bool sort_and_compare(int* arrayToSort, int* expectedResult, int length, C comparator, bool idempotent = false);
-  static bool test_quick_sort();
+  static void test_quick_sort();
 #endif
 };
 
--- a/src/share/vm/utilities/vmError.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/utilities/vmError.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -36,6 +36,7 @@
 #include "utilities/decoder.hpp"
 #include "utilities/defaultStream.hpp"
 #include "utilities/errorReporter.hpp"
+#include "utilities/events.hpp"
 #include "utilities/top.hpp"
 #include "utilities/vmError.hpp"
 
@@ -571,8 +572,6 @@
        if (fr.pc()) {
           st->print_cr("Native frames: (J=compiled Java code, j=interpreted, Vv=VM code, C=native code)");
 
-          // initialize decoder to decode C frames
-          Decoder decoder;
 
           int count = 0;
           while (count++ < StackPrintLimit) {
@@ -695,7 +694,14 @@
        st->cr();
      }
 
-  STEP(200, "(printing dynamic libraries)" )
+  STEP(200, "(printing ring buffers)" )
+
+     if (_verbose) {
+       Events::print_all(st);
+       st->cr();
+     }
+
+  STEP(205, "(printing dynamic libraries)" )
 
      if (_verbose) {
        // dynamic libraries, or memory map
--- a/src/share/vm/utilities/vmError.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/utilities/vmError.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -27,11 +27,12 @@
 
 #include "utilities/globalDefinitions.hpp"
 
-
+class Decoder;
 class VM_ReportJavaOutOfMemory;
 
 class VMError : public StackObj {
   friend class VM_ReportJavaOutOfMemory;
+  friend class Decoder;
 
   enum ErrorType {
     internal_error = 0xe0000000,
--- a/src/share/vm/utilities/workgroup.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/utilities/workgroup.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -53,14 +53,14 @@
 }
 
 WorkGang::WorkGang(const char* name,
-                   int         workers,
+                   uint        workers,
                    bool        are_GC_task_threads,
                    bool        are_ConcurrentGC_threads) :
   AbstractWorkGang(name, are_GC_task_threads, are_ConcurrentGC_threads) {
   _total_workers = workers;
 }
 
-GangWorker* WorkGang::allocate_worker(int which) {
+GangWorker* WorkGang::allocate_worker(uint which) {
   GangWorker* new_worker = new GangWorker(this, which);
   return new_worker;
 }
@@ -88,7 +88,7 @@
   } else {
     worker_type = os::pgc_thread;
   }
-  for (int worker = 0; worker < total_workers(); worker += 1) {
+  for (uint worker = 0; worker < total_workers(); worker += 1) {
     GangWorker* new_worker = allocate_worker(worker);
     assert(new_worker != NULL, "Failed to allocate GangWorker");
     _gang_workers[worker] = new_worker;
@@ -108,14 +108,14 @@
     tty->print_cr("Destructing work gang %s", name());
   }
   stop();   // stop all the workers
-  for (int worker = 0; worker < total_workers(); worker += 1) {
+  for (uint worker = 0; worker < total_workers(); worker += 1) {
     delete gang_worker(worker);
   }
   delete gang_workers();
   delete monitor();
 }
 
-GangWorker* AbstractWorkGang::gang_worker(int i) const {
+GangWorker* AbstractWorkGang::gang_worker(uint i) const {
   // Array index bounds checking.
   GangWorker* result = NULL;
   assert(gang_workers() != NULL, "No workers for indexing");
@@ -148,7 +148,7 @@
   // Tell the workers to get to work.
   monitor()->notify_all();
   // Wait for them to be finished
-  while (finished_workers() < (int) no_of_parallel_workers) {
+  while (finished_workers() < no_of_parallel_workers) {
     if (TraceWorkGang) {
       tty->print_cr("Waiting in work gang %s: %d/%d finished sequence %d",
                     name(), finished_workers(), no_of_parallel_workers,
@@ -377,12 +377,12 @@
     _n_workers(0), _n_completed(0), _should_reset(false) {
 }
 
-WorkGangBarrierSync::WorkGangBarrierSync(int n_workers, const char* name)
+WorkGangBarrierSync::WorkGangBarrierSync(uint n_workers, const char* name)
   : _monitor(Mutex::safepoint, name, true),
     _n_workers(n_workers), _n_completed(0), _should_reset(false) {
 }
 
-void WorkGangBarrierSync::set_n_workers(int n_workers) {
+void WorkGangBarrierSync::set_n_workers(uint n_workers) {
   _n_workers   = n_workers;
   _n_completed = 0;
   _should_reset = false;
@@ -419,9 +419,9 @@
 
 // SubTasksDone functions.
 
-SubTasksDone::SubTasksDone(int n) :
+SubTasksDone::SubTasksDone(uint n) :
   _n_tasks(n), _n_threads(1), _tasks(NULL) {
-  _tasks = NEW_C_HEAP_ARRAY(jint, n);
+  _tasks = NEW_C_HEAP_ARRAY(uint, n);
   guarantee(_tasks != NULL, "alloc failure");
   clear();
 }
@@ -430,14 +430,14 @@
   return _tasks != NULL;
 }
 
-void SubTasksDone::set_n_threads(int t) {
+void SubTasksDone::set_n_threads(uint t) {
   assert(_claimed == 0 || _threads_completed == _n_threads,
          "should not be called while tasks are being processed!");
   _n_threads = (t == 0 ? 1 : t);
 }
 
 void SubTasksDone::clear() {
-  for (int i = 0; i < _n_tasks; i++) {
+  for (uint i = 0; i < _n_tasks; i++) {
     _tasks[i] = 0;
   }
   _threads_completed = 0;
@@ -446,9 +446,9 @@
 #endif
 }
 
-bool SubTasksDone::is_task_claimed(int t) {
+bool SubTasksDone::is_task_claimed(uint t) {
   assert(0 <= t && t < _n_tasks, "bad task id.");
-  jint old = _tasks[t];
+  uint old = _tasks[t];
   if (old == 0) {
     old = Atomic::cmpxchg(1, &_tasks[t], 0);
   }
@@ -457,7 +457,7 @@
 #ifdef ASSERT
   if (!res) {
     assert(_claimed < _n_tasks, "Too many tasks claimed; missing clear?");
-    Atomic::inc(&_claimed);
+    Atomic::inc((volatile jint*) &_claimed);
   }
 #endif
   return res;
@@ -471,7 +471,7 @@
     observed = Atomic::cmpxchg(old+1, &_threads_completed, old);
   } while (observed != old);
   // If this was the last thread checking in, clear the tasks.
-  if (observed+1 == _n_threads) clear();
+  if (observed+1 == (jint)_n_threads) clear();
 }
 
 
@@ -490,12 +490,12 @@
   return _n_threads > 0;
 }
 
-bool SequentialSubTasksDone::is_task_claimed(int& t) {
-  jint* n_claimed_ptr = &_n_claimed;
+bool SequentialSubTasksDone::is_task_claimed(uint& t) {
+  uint* n_claimed_ptr = &_n_claimed;
   t = *n_claimed_ptr;
   while (t < _n_tasks) {
     jint res = Atomic::cmpxchg(t+1, n_claimed_ptr, t);
-    if (res == t) {
+    if (res == (jint)t) {
       return false;
     }
     t = *n_claimed_ptr;
@@ -504,10 +504,10 @@
 }
 
 bool SequentialSubTasksDone::all_tasks_completed() {
-  jint* n_completed_ptr = &_n_completed;
-  jint  complete        = *n_completed_ptr;
+  uint* n_completed_ptr = &_n_completed;
+  uint  complete        = *n_completed_ptr;
   while (true) {
-    jint res = Atomic::cmpxchg(complete+1, n_completed_ptr, complete);
+    uint res = Atomic::cmpxchg(complete+1, n_completed_ptr, complete);
     if (res == complete) {
       break;
     }
--- a/src/share/vm/utilities/workgroup.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/utilities/workgroup.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -68,7 +68,7 @@
 public:
   // The abstract work method.
   // The argument tells you which member of the gang you are.
-  virtual void work(int i) = 0;
+  virtual void work(uint worker_id) = 0;
 
   // This method configures the task for proper termination.
   // Some tasks do not have any requirements on termination
@@ -149,7 +149,7 @@
   // and notifies of changes in it.
   Monitor*  _monitor;
   // The count of the number of workers in the gang.
-  int _total_workers;
+  uint _total_workers;
   // Whether the workers should terminate.
   bool _terminate;
   // The array of worker threads for this gang.
@@ -160,18 +160,18 @@
   // A sequence number for the current task.
   int _sequence_number;
   // The number of started workers.
-  int _started_workers;
+  uint _started_workers;
   // The number of finished workers.
-  int _finished_workers;
+  uint _finished_workers;
 public:
   // Accessors for fields
   Monitor* monitor() const {
     return _monitor;
   }
-  int total_workers() const {
+  uint total_workers() const {
     return _total_workers;
   }
-  virtual int active_workers() const {
+  virtual uint active_workers() const {
     return _total_workers;
   }
   bool terminate() const {
@@ -186,10 +186,10 @@
   int sequence_number() const {
     return _sequence_number;
   }
-  int started_workers() const {
+  uint started_workers() const {
     return _started_workers;
   }
-  int finished_workers() const {
+  uint finished_workers() const {
     return _finished_workers;
   }
   bool are_GC_task_threads() const {
@@ -203,7 +203,7 @@
     return (task() == NULL);
   }
   // Return the Ith gang worker.
-  GangWorker* gang_worker(int i) const;
+  GangWorker* gang_worker(uint i) const;
 
   void threads_do(ThreadClosure* tc) const;
 
@@ -255,13 +255,13 @@
 class WorkGang: public AbstractWorkGang {
 public:
   // Constructor
-  WorkGang(const char* name, int workers,
+  WorkGang(const char* name, uint workers,
            bool are_GC_task_threads, bool are_ConcurrentGC_threads);
   // Run a task, returns when the task is done (or terminated).
   virtual void run_task(AbstractGangTask* task);
   void run_task(AbstractGangTask* task, uint no_of_parallel_workers);
   // Allocate a worker and return a pointer to it.
-  virtual GangWorker* allocate_worker(int which);
+  virtual GangWorker* allocate_worker(uint which);
   // Initialize workers in the gang.  Return true if initialization
   // succeeded. The type of the worker can be overridden in a derived
   // class with the appropriate implementation of allocate_worker().
@@ -323,25 +323,25 @@
   // determine completion.
 
  protected:
-  int _active_workers;
+  uint _active_workers;
  public:
   // Constructor and destructor.
   // Initialize active_workers to a minimum value.  Setting it to
   // the parameter "workers" will initialize it to a maximum
   // value which is not desirable.
-  FlexibleWorkGang(const char* name, int workers,
+  FlexibleWorkGang(const char* name, uint workers,
                    bool are_GC_task_threads,
                    bool  are_ConcurrentGC_threads) :
     WorkGang(name, workers, are_GC_task_threads, are_ConcurrentGC_threads),
-    _active_workers(UseDynamicNumberOfGCThreads ? 1 : ParallelGCThreads) {};
+    _active_workers(UseDynamicNumberOfGCThreads ? 1U : ParallelGCThreads) {}
   // Accessors for fields
-  virtual int active_workers() const { return _active_workers; }
-  void set_active_workers(int v) {
+  virtual uint active_workers() const { return _active_workers; }
+  void set_active_workers(uint v) {
     assert(v <= _total_workers,
            "Trying to set more workers active than there are");
     _active_workers = MIN2(v, _total_workers);
     assert(v != 0, "Trying to set active workers to 0");
-    _active_workers = MAX2(1, _active_workers);
+    _active_workers = MAX2(1U, _active_workers);
     assert(UseDynamicNumberOfGCThreads || _active_workers == _total_workers,
            "Unless dynamic should use total workers");
   }
@@ -370,13 +370,13 @@
 class WorkGangBarrierSync : public StackObj {
 protected:
   Monitor _monitor;
-  int     _n_workers;
-  int     _n_completed;
+  uint     _n_workers;
+  uint     _n_completed;
   bool    _should_reset;
 
   Monitor* monitor()        { return &_monitor; }
-  int      n_workers()      { return _n_workers; }
-  int      n_completed()    { return _n_completed; }
+  uint     n_workers()      { return _n_workers; }
+  uint     n_completed()    { return _n_completed; }
   bool     should_reset()   { return _should_reset; }
 
   void     zero_completed() { _n_completed = 0; }
@@ -386,11 +386,11 @@
 
 public:
   WorkGangBarrierSync();
-  WorkGangBarrierSync(int n_workers, const char* name);
+  WorkGangBarrierSync(uint n_workers, const char* name);
 
   // Set the number of workers that will use the barrier.
   // Must be called before any of the workers start running.
-  void set_n_workers(int n_workers);
+  void set_n_workers(uint n_workers);
 
   // Enter the barrier. A worker that enters the barrier will
   // not be allowed to leave until all other threads have
@@ -402,18 +402,18 @@
 // subtasks will be identified by integer indices, usually elements of an
 // enumeration type.
 
-class SubTasksDone: public CHeapObj {
-  jint* _tasks;
-  int _n_tasks;
+class SubTasksDone : public CHeapObj {
+  uint* _tasks;
+  uint _n_tasks;
   // _n_threads is used to determine when a sub task is done.
   // It does not control how many threads will execute the subtask
   // but must be initialized to the number that do execute the task
   // in order to correctly decide when the subtask is done (all the
   // threads working on the task have finished).
-  int _n_threads;
-  jint _threads_completed;
+  uint _n_threads;
+  uint _threads_completed;
 #ifdef ASSERT
-  volatile jint _claimed;
+  volatile uint _claimed;
 #endif
 
   // Set all tasks to unclaimed.
@@ -423,19 +423,19 @@
   // Initializes "this" to a state in which there are "n" tasks to be
   // processed, none of the which are originally claimed.  The number of
   // threads doing the tasks is initialized 1.
-  SubTasksDone(int n);
+  SubTasksDone(uint n);
 
   // True iff the object is in a valid state.
   bool valid();
 
   // Get/set the number of parallel threads doing the tasks to "t".  Can only
   // be called before tasks start or after they are complete.
-  int n_threads() { return _n_threads; }
-  void set_n_threads(int t);
+  uint n_threads() { return _n_threads; }
+  void set_n_threads(uint t);
 
   // Returns "false" if the task "t" is unclaimed, and ensures that task is
   // claimed.  The task "t" is required to be within the range of "this".
-  bool is_task_claimed(int t);
+  bool is_task_claimed(uint t);
 
   // The calling thread asserts that it has attempted to claim all the
   // tasks that it will try to claim.  Every thread in the parallel task
@@ -456,12 +456,12 @@
 
 class SequentialSubTasksDone : public StackObj {
 protected:
-  jint _n_tasks;     // Total number of tasks available.
-  jint _n_claimed;   // Number of tasks claimed.
+  uint _n_tasks;     // Total number of tasks available.
+  uint _n_claimed;   // Number of tasks claimed.
   // _n_threads is used to determine when a sub task is done.
   // See comments on SubTasksDone::_n_threads
-  jint _n_threads;   // Total number of parallel threads.
-  jint _n_completed; // Number of completed threads.
+  uint _n_threads;   // Total number of parallel threads.
+  uint _n_completed; // Number of completed threads.
 
   void clear();
 
@@ -475,26 +475,26 @@
   bool valid();
 
   // number of tasks
-  jint n_tasks() const { return _n_tasks; }
+  uint n_tasks() const { return _n_tasks; }
 
   // Get/set the number of parallel threads doing the tasks to t.
   // Should be called before the task starts but it is safe
   // to call this once a task is running provided that all
   // threads agree on the number of threads.
-  int n_threads() { return _n_threads; }
-  void set_n_threads(int t) { _n_threads = t; }
+  uint n_threads() { return _n_threads; }
+  void set_n_threads(uint t) { _n_threads = t; }
 
   // Set the number of tasks to be claimed to t. As above,
   // should be called before the tasks start but it is safe
   // to call this once a task is running provided all threads
   // agree on the number of tasks.
-  void set_n_tasks(int t) { _n_tasks = t; }
+  void set_n_tasks(uint t) { _n_tasks = t; }
 
   // Returns false if the next task in the sequence is unclaimed,
   // and ensures that it is claimed. Will set t to be the index
   // of the claimed task in the sequence. Will return true if
   // the task cannot be claimed and there are none left to claim.
-  bool is_task_claimed(int& t);
+  bool is_task_claimed(uint& t);
 
   // The calling thread asserts that it has attempted to claim
   // all the tasks it possibly can in the sequence. Every thread
--- a/src/share/vm/utilities/xmlstream.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/utilities/xmlstream.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -192,8 +192,11 @@
     _element_close_stack_ptr = cur_tag + strlen(cur_tag) + 1;
     _element_depth -= 1;
   }
-  if (bad_tag && !VMThread::should_terminate() && !is_error_reported())
+  if (bad_tag && !VMThread::should_terminate() && !VM_Exit::vm_exited() &&
+      !is_error_reported())
+  {
     assert(false, "bad tag in log");
+  }
 }
 #endif
 
--- a/src/share/vm/utilities/yieldingWorkgroup.cpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/utilities/yieldingWorkgroup.cpp	Mon Feb 27 15:06:36 2012 -0800
@@ -33,11 +33,11 @@
 class WorkData;
 
 YieldingFlexibleWorkGang::YieldingFlexibleWorkGang(
-  const char* name, int workers, bool are_GC_task_threads) :
+  const char* name, uint workers, bool are_GC_task_threads) :
   FlexibleWorkGang(name, workers, are_GC_task_threads, false),
     _yielded_workers(0) {}
 
-GangWorker* YieldingFlexibleWorkGang::allocate_worker(int which) {
+GangWorker* YieldingFlexibleWorkGang::allocate_worker(uint which) {
   YieldingFlexibleGangWorker* new_member =
       new YieldingFlexibleGangWorker(this, which);
   return (YieldingFlexibleGangWorker*) new_member;
@@ -120,7 +120,7 @@
   new_task->set_gang(this);  // Establish 2-way binding to support yielding
   _sequence_number++;
 
-  int requested_size = new_task->requested_size();
+  uint requested_size = new_task->requested_size();
   assert(requested_size >= 0, "Should be non-negative");
   if (requested_size != 0) {
     _active_workers = MIN2(requested_size, total_workers());
--- a/src/share/vm/utilities/yieldingWorkgroup.hpp	Mon Feb 27 14:50:58 2012 -0800
+++ b/src/share/vm/utilities/yieldingWorkgroup.hpp	Mon Feb 27 15:06:36 2012 -0800
@@ -71,7 +71,7 @@
 
   // The abstract work method.
   // The argument tells you which member of the gang you are.
-  virtual void work(int i) = 0;
+  virtual void work(uint worker_id) = 0;
 
   int requested_size() const { return _requested_size; }
   int actual_size()    const { return _actual_size; }
@@ -128,7 +128,7 @@
 public:
   // The abstract work method.
   // The argument tells you which member of the gang you are.
-  virtual void work(int i) = 0;
+  virtual void work(uint worker_id) = 0;
 
   // Subclasses should call the parent's yield() method
   // after having done any work specific to the subclass.
@@ -159,7 +159,7 @@
   // Here's the public interface to this class.
 public:
   // Constructor and destructor.
-  YieldingFlexibleWorkGang(const char* name, int workers,
+  YieldingFlexibleWorkGang(const char* name, uint workers,
                            bool are_GC_task_threads);
 
   YieldingFlexibleGangTask* yielding_task() const {
@@ -168,7 +168,7 @@
     return (YieldingFlexibleGangTask*)task();
   }
   // Allocate a worker and return a pointer to it.
-  GangWorker* allocate_worker(int which);
+  GangWorker* allocate_worker(uint which);
 
   // Run a task; returns when the task is done, or the workers yield,
   // or the task is aborted, or the work gang is terminated via stop().
@@ -199,12 +199,12 @@
   void abort();
 
 private:
-  int _yielded_workers;
+  uint _yielded_workers;
   void wait_for_gang();
 
 public:
   // Accessors for fields
-  int yielded_workers() const {
+  uint yielded_workers() const {
     return _yielded_workers;
   }
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7090976/Test7090976.java	Mon Feb 27 15:06:36 2012 -0800
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7090976
+ * @summary Eclipse/CDT causes a JVM crash while indexing C++ code
+ *
+ * @run main/othervm -XX:-BackgroundCompilation -XX:-UseOnStackReplacement Test7090976
+ */
+
+public class Test7090976 {
+
+    static interface I1 {
+        public void m1();
+    };
+
+    static interface I2 {
+        public void m2();
+    };
+
+    static interface I extends I1,I2 {
+    }
+
+    static class A implements I1 {
+        int v = 0;
+        int v2;
+
+        public void m1() {
+            v2 = v;
+        }
+    }
+
+    static class B implements I2 {
+        Object v = new Object();
+        Object v2;
+
+        public void m2() {
+            v2 = v;
+        }
+    }
+
+    private void test(A a)
+    {
+        if (a instanceof I) {
+            I i = (I)a;
+            i.m1();
+            i.m2();
+        }
+    }
+
+    public static void main(String[] args)
+    {
+        Test7090976 t = new Test7090976();
+        A a = new A();
+        B b = new B();
+        for (int i = 0; i < 10000; i++) {
+            t.test(a);
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7116216/LargeFrame.java	Mon Feb 27 15:06:36 2012 -0800
@@ -0,0 +1,1329 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+public class LargeFrame {
+
+    public static void method_with_many_locals(Object r1, int r2, int r3, int r4, int r5, int r6, int r7, Object r8) {
+        int i00 = 0, i01 = 0, i02 = 0, i03 = 0, i04 = 0, i05 = 0, i06 = 0, i07 = 0, i08 = 0, i09 = 0;
+        int i10 = 0, i11 = 0, i12 = 0, i13 = 0, i14 = 0, i15 = 0, i16 = 0, i17 = 0, i18 = 0, i19 = 0;
+        int i20 = 0, i21 = 0, i22 = 0, i23 = 0, i24 = 0, i25 = 0, i26 = 0, i27 = 0, i28 = 0, i29 = 0;
+        int i30 = 0, i31 = 0, i32 = 0, i33 = 0, i34 = 0, i35 = 0, i36 = 0, i37 = 0, i38 = 0, i39 = 0;
+        int i40 = 0, i41 = 0, i42 = 0, i43 = 0, i44 = 0, i45 = 0, i46 = 0, i47 = 0, i48 = 0, i49 = 0;
+        int i50 = 0, i51 = 0, i52 = 0, i53 = 0, i54 = 0, i55 = 0, i56 = 0, i57 = 0, i58 = 0, i59 = 0;
+        int i60 = 0, i61 = 0, i62 = 0, i63 = 0, i64 = 0, i65 = 0, i66 = 0, i67 = 0, i68 = 0, i69 = 0;
+        int i70 = 0, i71 = 0, i72 = 0, i73 = 0, i74 = 0, i75 = 0, i76 = 0, i77 = 0, i78 = 0, i79 = 0;
+        int i80 = 0, i81 = 0, i82 = 0, i83 = 0, i84 = 0, i85 = 0, i86 = 0, i87 = 0, i88 = 0, i89 = 0;
+        int i90 = 0, i91 = 0, i92 = 0, i93 = 0, i94 = 0, i95 = 0, i96 = 0, i97 = 0, i98 = 0, i99 = 0;
+        int i100 = 0, i101 = 0, i102 = 0, i103 = 0, i104 = 0, i105 = 0, i106 = 0, i107 = 0, i108 = 0, i109 = 0;
+        int i110 = 0, i111 = 0, i112 = 0, i113 = 0, i114 = 0, i115 = 0, i116 = 0, i117 = 0, i118 = 0, i119 = 0;
+        int i120 = 0, i121 = 0, i122 = 0, i123 = 0, i124 = 0, i125 = 0, i126 = 0, i127 = 0, i128 = 0, i129 = 0;
+        int i130 = 0, i131 = 0, i132 = 0, i133 = 0, i134 = 0, i135 = 0, i136 = 0, i137 = 0, i138 = 0, i139 = 0;
+        int i140 = 0, i141 = 0, i142 = 0, i143 = 0, i144 = 0, i145 = 0, i146 = 0, i147 = 0, i148 = 0, i149 = 0;
+        int i150 = 0, i151 = 0, i152 = 0, i153 = 0, i154 = 0, i155 = 0, i156 = 0, i157 = 0, i158 = 0, i159 = 0;
+        int i160 = 0, i161 = 0, i162 = 0, i163 = 0, i164 = 0, i165 = 0, i166 = 0, i167 = 0, i168 = 0, i169 = 0;
+        int i170 = 0, i171 = 0, i172 = 0, i173 = 0, i174 = 0, i175 = 0, i176 = 0, i177 = 0, i178 = 0, i179 = 0;
+        int i180 = 0, i181 = 0, i182 = 0, i183 = 0, i184 = 0, i185 = 0, i186 = 0, i187 = 0, i188 = 0, i189 = 0;
+        int i190 = 0, i191 = 0, i192 = 0, i193 = 0, i194 = 0, i195 = 0, i196 = 0, i197 = 0, i198 = 0, i199 = 0;
+        int i200 = 0, i201 = 0, i202 = 0, i203 = 0, i204 = 0, i205 = 0, i206 = 0, i207 = 0, i208 = 0, i209 = 0;
+        int i210 = 0, i211 = 0, i212 = 0, i213 = 0, i214 = 0, i215 = 0, i216 = 0, i217 = 0, i218 = 0, i219 = 0;
+        int i220 = 0, i221 = 0, i222 = 0, i223 = 0, i224 = 0, i225 = 0, i226 = 0, i227 = 0, i228 = 0, i229 = 0;
+        int i230 = 0, i231 = 0, i232 = 0, i233 = 0, i234 = 0, i235 = 0, i236 = 0, i237 = 0, i238 = 0, i239 = 0;
+        int i240 = 0, i241 = 0, i242 = 0, i243 = 0, i244 = 0, i245 = 0, i246 = 0, i247 = 0, i248 = 0, i249 = 0;
+        int i250 = 0, i251 = 0, i252 = 0, i253 = 0, i254 = 0, i255 = 0, i256 = 0, i257 = 0, i258 = 0, i259 = 0;
+        int i260 = 0, i261 = 0, i262 = 0, i263 = 0, i264 = 0, i265 = 0, i266 = 0, i267 = 0, i268 = 0, i269 = 0;
+        int i270 = 0, i271 = 0, i272 = 0, i273 = 0, i274 = 0, i275 = 0, i276 = 0, i277 = 0, i278 = 0, i279 = 0;
+        int i280 = 0, i281 = 0, i282 = 0, i283 = 0, i284 = 0, i285 = 0, i286 = 0, i287 = 0, i288 = 0, i289 = 0;
+        int i290 = 0, i291 = 0, i292 = 0, i293 = 0, i294 = 0, i295 = 0, i296 = 0, i297 = 0, i298 = 0, i299 = 0;
+        int i300 = 0, i301 = 0, i302 = 0, i303 = 0, i304 = 0, i305 = 0, i306 = 0, i307 = 0, i308 = 0, i309 = 0;
+        int i310 = 0, i311 = 0, i312 = 0, i313 = 0, i314 = 0, i315 = 0, i316 = 0, i317 = 0, i318 = 0, i319 = 0;
+        int i320 = 0, i321 = 0, i322 = 0, i323 = 0, i324 = 0, i325 = 0, i326 = 0, i327 = 0, i328 = 0, i329 = 0;
+        int i330 = 0, i331 = 0, i332 = 0, i333 = 0, i334 = 0, i335 = 0, i336 = 0, i337 = 0, i338 = 0, i339 = 0;
+        int i340 = 0, i341 = 0, i342 = 0, i343 = 0, i344 = 0, i345 = 0, i346 = 0, i347 = 0, i348 = 0, i349 = 0;
+        int i350 = 0, i351 = 0, i352 = 0, i353 = 0, i354 = 0, i355 = 0, i356 = 0, i357 = 0, i358 = 0, i359 = 0;
+        int i360 = 0, i361 = 0, i362 = 0, i363 = 0, i364 = 0, i365 = 0, i366 = 0, i367 = 0, i368 = 0, i369 = 0;
+        int i370 = 0, i371 = 0, i372 = 0, i373 = 0, i374 = 0, i375 = 0, i376 = 0, i377 = 0, i378 = 0, i379 = 0;
+        int i380 = 0, i381 = 0, i382 = 0, i383 = 0, i384 = 0, i385 = 0, i386 = 0, i387 = 0, i388 = 0, i389 = 0;
+        int i390 = 0, i391 = 0, i392 = 0, i393 = 0, i394 = 0, i395 = 0, i396 = 0, i397 = 0, i398 = 0, i399 = 0;
+        int i400 = 0, i401 = 0, i402 = 0, i403 = 0, i404 = 0, i405 = 0, i406 = 0, i407 = 0, i408 = 0, i409 = 0;
+        int i410 = 0, i411 = 0, i412 = 0, i413 = 0, i414 = 0, i415 = 0, i416 = 0, i417 = 0, i418 = 0, i419 = 0;
+        int i420 = 0, i421 = 0, i422 = 0, i423 = 0, i424 = 0, i425 = 0, i426 = 0, i427 = 0, i428 = 0, i429 = 0;
+        int i430 = 0, i431 = 0, i432 = 0, i433 = 0, i434 = 0, i435 = 0, i436 = 0, i437 = 0, i438 = 0, i439 = 0;
+        int i440 = 0, i441 = 0, i442 = 0, i443 = 0, i444 = 0, i445 = 0, i446 = 0, i447 = 0, i448 = 0, i449 = 0;
+        int i450 = 0, i451 = 0, i452 = 0, i453 = 0, i454 = 0, i455 = 0, i456 = 0, i457 = 0, i458 = 0, i459 = 0;
+        int i460 = 0, i461 = 0, i462 = 0, i463 = 0, i464 = 0, i465 = 0, i466 = 0, i467 = 0, i468 = 0, i469 = 0;
+        int i470 = 0, i471 = 0, i472 = 0, i473 = 0, i474 = 0, i475 = 0, i476 = 0, i477 = 0, i478 = 0, i479 = 0;
+        int i480 = 0, i481 = 0, i482 = 0, i483 = 0, i484 = 0, i485 = 0, i486 = 0, i487 = 0, i488 = 0, i489 = 0;
+        int i490 = 0, i491 = 0, i492 = 0, i493 = 0, i494 = 0, i495 = 0, i496 = 0, i497 = 0, i498 = 0, i499 = 0;
+        int i500 = 0, i501 = 0, i502 = 0, i503 = 0, i504 = 0, i505 = 0, i506 = 0, i507 = 0, i508 = 0, i509 = 0;
+        int i510 = 0, i511 = 0, i512 = 0, i513 = 0, i514 = 0, i515 = 0, i516 = 0, i517 = 0, i518 = 0, i519 = 0;
+        int i520 = 0, i521 = 0, i522 = 0, i523 = 0, i524 = 0, i525 = 0, i526 = 0, i527 = 0, i528 = 0, i529 = 0;
+        int i530 = 0, i531 = 0, i532 = 0, i533 = 0, i534 = 0, i535 = 0, i536 = 0, i537 = 0, i538 = 0, i539 = 0;
+        int i540 = 0, i541 = 0, i542 = 0, i543 = 0, i544 = 0, i545 = 0, i546 = 0, i547 = 0, i548 = 0, i549 = 0;
+        int i550 = 0, i551 = 0, i552 = 0, i553 = 0, i554 = 0, i555 = 0, i556 = 0, i557 = 0, i558 = 0, i559 = 0;
+        int i560 = 0, i561 = 0, i562 = 0, i563 = 0, i564 = 0, i565 = 0, i566 = 0, i567 = 0, i568 = 0, i569 = 0;
+        int i570 = 0, i571 = 0, i572 = 0, i573 = 0, i574 = 0, i575 = 0, i576 = 0, i577 = 0, i578 = 0, i579 = 0;
+        int i580 = 0, i581 = 0, i582 = 0, i583 = 0, i584 = 0, i585 = 0, i586 = 0, i587 = 0, i588 = 0, i589 = 0;
+        int i590 = 0, i591 = 0, i592 = 0, i593 = 0, i594 = 0, i595 = 0, i596 = 0, i597 = 0, i598 = 0, i599 = 0;
+        int i600 = 0, i601 = 0, i602 = 0, i603 = 0, i604 = 0, i605 = 0, i606 = 0, i607 = 0, i608 = 0, i609 = 0;
+        int i610 = 0, i611 = 0, i612 = 0, i613 = 0, i614 = 0, i615 = 0, i616 = 0, i617 = 0, i618 = 0, i619 = 0;
+        int i620 = 0, i621 = 0, i622 = 0, i623 = 0, i624 = 0, i625 = 0, i626 = 0, i627 = 0, i628 = 0, i629 = 0;
+        int i630 = 0, i631 = 0, i632 = 0, i633 = 0, i634 = 0, i635 = 0, i636 = 0, i637 = 0, i638 = 0, i639 = 0;
+        int i640 = 0, i641 = 0, i642 = 0, i643 = 0, i644 = 0, i645 = 0, i646 = 0, i647 = 0, i648 = 0, i649 = 0;
+        int i650 = 0, i651 = 0, i652 = 0, i653 = 0, i654 = 0, i655 = 0, i656 = 0, i657 = 0, i658 = 0, i659 = 0;
+        int i660 = 0, i661 = 0, i662 = 0, i663 = 0, i664 = 0, i665 = 0, i666 = 0, i667 = 0, i668 = 0, i669 = 0;
+        int i670 = 0, i671 = 0, i672 = 0, i673 = 0, i674 = 0, i675 = 0, i676 = 0, i677 = 0, i678 = 0, i679 = 0;
+        int i680 = 0, i681 = 0, i682 = 0, i683 = 0, i684 = 0, i685 = 0, i686 = 0, i687 = 0, i688 = 0, i689 = 0;
+        int i690 = 0, i691 = 0, i692 = 0, i693 = 0, i694 = 0, i695 = 0, i696 = 0, i697 = 0, i698 = 0, i699 = 0;
+        int i700 = 0, i701 = 0, i702 = 0, i703 = 0, i704 = 0, i705 = 0, i706 = 0, i707 = 0, i708 = 0, i709 = 0;
+        int i710 = 0, i711 = 0, i712 = 0, i713 = 0, i714 = 0, i715 = 0, i716 = 0, i717 = 0, i718 = 0, i719 = 0;
+        int i720 = 0, i721 = 0, i722 = 0, i723 = 0, i724 = 0, i725 = 0, i726 = 0, i727 = 0, i728 = 0, i729 = 0;
+        int i730 = 0, i731 = 0, i732 = 0, i733 = 0, i734 = 0, i735 = 0, i736 = 0, i737 = 0, i738 = 0, i739 = 0;
+        int i740 = 0, i741 = 0, i742 = 0, i743 = 0, i744 = 0, i745 = 0, i746 = 0, i747 = 0, i748 = 0, i749 = 0;
+        int i750 = 0, i751 = 0, i752 = 0, i753 = 0, i754 = 0, i755 = 0, i756 = 0, i757 = 0, i758 = 0, i759 = 0;
+        int i760 = 0, i761 = 0, i762 = 0, i763 = 0, i764 = 0, i765 = 0, i766 = 0, i767 = 0, i768 = 0, i769 = 0;
+        int i770 = 0, i771 = 0, i772 = 0, i773 = 0, i774 = 0, i775 = 0, i776 = 0, i777 = 0, i778 = 0, i779 = 0;
+        int i780 = 0, i781 = 0, i782 = 0, i783 = 0, i784 = 0, i785 = 0, i786 = 0, i787 = 0, i788 = 0, i789 = 0;
+        int i790 = 0, i791 = 0, i792 = 0, i793 = 0, i794 = 0, i795 = 0, i796 = 0, i797 = 0, i798 = 0, i799 = 0;
+        int i800 = 0, i801 = 0, i802 = 0, i803 = 0, i804 = 0, i805 = 0, i806 = 0, i807 = 0, i808 = 0, i809 = 0;
+        int i810 = 0, i811 = 0, i812 = 0, i813 = 0, i814 = 0, i815 = 0, i816 = 0, i817 = 0, i818 = 0, i819 = 0;
+        int i820 = 0, i821 = 0, i822 = 0, i823 = 0, i824 = 0, i825 = 0, i826 = 0, i827 = 0, i828 = 0, i829 = 0;
+        int i830 = 0, i831 = 0, i832 = 0, i833 = 0, i834 = 0, i835 = 0, i836 = 0, i837 = 0, i838 = 0, i839 = 0;
+        int i840 = 0, i841 = 0, i842 = 0, i843 = 0, i844 = 0, i845 = 0, i846 = 0, i847 = 0, i848 = 0, i849 = 0;
+        int i850 = 0, i851 = 0, i852 = 0, i853 = 0, i854 = 0, i855 = 0, i856 = 0, i857 = 0, i858 = 0, i859 = 0;
+        int i860 = 0, i861 = 0, i862 = 0, i863 = 0, i864 = 0, i865 = 0, i866 = 0, i867 = 0, i868 = 0, i869 = 0;
+        int i870 = 0, i871 = 0, i872 = 0, i873 = 0, i874 = 0, i875 = 0, i876 = 0, i877 = 0, i878 = 0, i879 = 0;
+        int i880 = 0, i881 = 0, i882 = 0, i883 = 0, i884 = 0, i885 = 0, i886 = 0, i887 = 0, i888 = 0, i889 = 0;
+        int i890 = 0, i891 = 0, i892 = 0, i893 = 0, i894 = 0, i895 = 0, i896 = 0, i897 = 0, i898 = 0, i899 = 0;
+        int i900 = 0, i901 = 0, i902 = 0, i903 = 0, i904 = 0, i905 = 0, i906 = 0, i907 = 0, i908 = 0, i909 = 0;
+        int i910 = 0, i911 = 0, i912 = 0, i913 = 0, i914 = 0, i915 = 0, i916 = 0, i917 = 0, i918 = 0, i919 = 0;
+        int i920 = 0, i921 = 0, i922 = 0, i923 = 0, i924 = 0, i925 = 0, i926 = 0, i927 = 0, i928 = 0, i929 = 0;
+        int i930 = 0, i931 = 0, i932 = 0, i933 = 0, i934 = 0, i935 = 0, i936 = 0, i937 = 0, i938 = 0, i939 = 0;
+        int i940 = 0, i941 = 0, i942 = 0, i943 = 0, i944 = 0, i945 = 0, i946 = 0, i947 = 0, i948 = 0, i949 = 0;
+        int i950 = 0, i951 = 0, i952 = 0, i953 = 0, i954 = 0, i955 = 0, i956 = 0, i957 = 0, i958 = 0, i959 = 0;
+        int i960 = 0, i961 = 0, i962 = 0, i963 = 0, i964 = 0, i965 = 0, i966 = 0, i967 = 0, i968 = 0, i969 = 0;
+        int i970 = 0, i971 = 0, i972 = 0, i973 = 0, i974 = 0, i975 = 0, i976 = 0, i977 = 0, i978 = 0, i979 = 0;
+        int i980 = 0, i981 = 0, i982 = 0, i983 = 0, i984 = 0, i985 = 0, i986 = 0, i987 = 0, i988 = 0, i989 = 0;
+        int i990 = 0, i991 = 0, i992 = 0, i993 = 0, i994 = 0, i995 = 0, i996 = 0, i997 = 0, i998 = 0, i999 = 0;
+        int i1000 = 0, i1001 = 0, i1002 = 0, i1003 = 0, i1004 = 0, i1005 = 0, i1006 = 0, i1007 = 0, i1008 = 0, i1009 = 0;
+        int i1010 = 0, i1011 = 0, i1012 = 0, i1013 = 0, i1014 = 0, i1015 = 0, i1016 = 0, i1017 = 0, i1018 = 0, i1019 = 0;
+        int i1020 = 0, i1021 = 0, i1022 = 0, i1023 = 0, i1024 = 0, i1025 = 0, i1026 = 0, i1027 = 0, i1028 = 0, i1029 = 0;
+        int i1030 = 0, i1031 = 0, i1032 = 0, i1033 = 0, i1034 = 0, i1035 = 0, i1036 = 0, i1037 = 0, i1038 = 0, i1039 = 0;
+        int i1040 = 0, i1041 = 0, i1042 = 0, i1043 = 0, i1044 = 0, i1045 = 0, i1046 = 0, i1047 = 0, i1048 = 0, i1049 = 0;
+        int i1050 = 0, i1051 = 0, i1052 = 0, i1053 = 0, i1054 = 0, i1055 = 0, i1056 = 0, i1057 = 0, i1058 = 0, i1059 = 0;
+        int i1060 = 0, i1061 = 0, i1062 = 0, i1063 = 0, i1064 = 0, i1065 = 0, i1066 = 0, i1067 = 0, i1068 = 0, i1069 = 0;
+        int i1070 = 0, i1071 = 0, i1072 = 0, i1073 = 0, i1074 = 0, i1075 = 0, i1076 = 0, i1077 = 0, i1078 = 0, i1079 = 0;
+        int i1080 = 0, i1081 = 0, i1082 = 0, i1083 = 0, i1084 = 0, i1085 = 0, i1086 = 0, i1087 = 0, i1088 = 0, i1089 = 0;
+        int i1090 = 0, i1091 = 0, i1092 = 0, i1093 = 0, i1094 = 0, i1095 = 0, i1096 = 0, i1097 = 0, i1098 = 0, i1099 = 0;
+        int i1100 = 0, i1101 = 0, i1102 = 0, i1103 = 0, i1104 = 0, i1105 = 0, i1106 = 0, i1107 = 0, i1108 = 0, i1109 = 0;
+        int i1110 = 0, i1111 = 0, i1112 = 0, i1113 = 0, i1114 = 0, i1115 = 0, i1116 = 0, i1117 = 0, i1118 = 0, i1119 = 0;
+        int i1120 = 0, i1121 = 0, i1122 = 0, i1123 = 0, i1124 = 0, i1125 = 0, i1126 = 0, i1127 = 0, i1128 = 0, i1129 = 0;
+        int i1130 = 0, i1131 = 0, i1132 = 0, i1133 = 0, i1134 = 0, i1135 = 0, i1136 = 0, i1137 = 0, i1138 = 0, i1139 = 0;
+        int i1140 = 0, i1141 = 0, i1142 = 0, i1143 = 0, i1144 = 0, i1145 = 0, i1146 = 0, i1147 = 0, i1148 = 0, i1149 = 0;
+        int i1150 = 0, i1151 = 0, i1152 = 0, i1153 = 0, i1154 = 0, i1155 = 0, i1156 = 0, i1157 = 0, i1158 = 0, i1159 = 0;
+        int i1160 = 0, i1161 = 0, i1162 = 0, i1163 = 0, i1164 = 0, i1165 = 0, i1166 = 0, i1167 = 0, i1168 = 0, i1169 = 0;
+        int i1170 = 0, i1171 = 0, i1172 = 0, i1173 = 0, i1174 = 0, i1175 = 0, i1176 = 0, i1177 = 0, i1178 = 0, i1179 = 0;
+        int i1180 = 0, i1181 = 0, i1182 = 0, i1183 = 0, i1184 = 0, i1185 = 0, i1186 = 0, i1187 = 0, i1188 = 0, i1189 = 0;
+        int i1190 = 0, i1191 = 0, i1192 = 0, i1193 = 0, i1194 = 0, i1195 = 0, i1196 = 0, i1197 = 0, i1198 = 0, i1199 = 0;
+        int i1200 = 0, i1201 = 0, i1202 = 0, i1203 = 0, i1204 = 0, i1205 = 0, i1206 = 0, i1207 = 0, i1208 = 0, i1209 = 0;
+        int i1210 = 0, i1211 = 0, i1212 = 0, i1213 = 0, i1214 = 0, i1215 = 0, i1216 = 0, i1217 = 0, i1218 = 0, i1219 = 0;
+        int i1220 = 0, i1221 = 0, i1222 = 0, i1223 = 0, i1224 = 0, i1225 = 0, i1226 = 0, i1227 = 0, i1228 = 0, i1229 = 0;
+        int i1230 = 0, i1231 = 0, i1232 = 0, i1233 = 0, i1234 = 0, i1235 = 0, i1236 = 0, i1237 = 0, i1238 = 0, i1239 = 0;
+        int i1240 = 0, i1241 = 0, i1242 = 0, i1243 = 0, i1244 = 0, i1245 = 0, i1246 = 0, i1247 = 0, i1248 = 0, i1249 = 0;
+        int i1250 = 0, i1251 = 0, i1252 = 0, i1253 = 0, i1254 = 0, i1255 = 0, i1256 = 0, i1257 = 0, i1258 = 0, i1259 = 0;
+        int i1260 = 0, i1261 = 0, i1262 = 0, i1263 = 0, i1264 = 0, i1265 = 0, i1266 = 0, i1267 = 0, i1268 = 0, i1269 = 0;
+        int i1270 = 0, i1271 = 0, i1272 = 0, i1273 = 0, i1274 = 0, i1275 = 0, i1276 = 0, i1277 = 0, i1278 = 0, i1279 = 0;
+        int i1280 = 0, i1281 = 0, i1282 = 0, i1283 = 0, i1284 = 0, i1285 = 0, i1286 = 0, i1287 = 0, i1288 = 0, i1289 = 0;
+        int i1290 = 0, i1291 = 0, i1292 = 0, i1293 = 0, i1294 = 0, i1295 = 0, i1296 = 0, i1297 = 0, i1298 = 0, i1299 = 0;
+        int i1300 = 0, i1301 = 0, i1302 = 0, i1303 = 0, i1304 = 0, i1305 = 0, i1306 = 0, i1307 = 0, i1308 = 0, i1309 = 0;
+        int i1310 = 0, i1311 = 0, i1312 = 0, i1313 = 0, i1314 = 0, i1315 = 0, i1316 = 0, i1317 = 0, i1318 = 0, i1319 = 0;
+        int i1320 = 0, i1321 = 0, i1322 = 0, i1323 = 0, i1324 = 0, i1325 = 0, i1326 = 0, i1327 = 0, i1328 = 0, i1329 = 0;
+        int i1330 = 0, i1331 = 0, i1332 = 0, i1333 = 0, i1334 = 0, i1335 = 0, i1336 = 0, i1337 = 0, i1338 = 0, i1339 = 0;
+        int i1340 = 0, i1341 = 0, i1342 = 0, i1343 = 0, i1344 = 0, i1345 = 0, i1346 = 0, i1347 = 0, i1348 = 0, i1349 = 0;
+        int i1350 = 0, i1351 = 0, i1352 = 0, i1353 = 0, i1354 = 0, i1355 = 0, i1356 = 0, i1357 = 0, i1358 = 0, i1359 = 0;
+        int i1360 = 0, i1361 = 0, i1362 = 0, i1363 = 0, i1364 = 0, i1365 = 0, i1366 = 0, i1367 = 0, i1368 = 0, i1369 = 0;
+        int i1370 = 0, i1371 = 0, i1372 = 0, i1373 = 0, i1374 = 0, i1375 = 0, i1376 = 0, i1377 = 0, i1378 = 0, i1379 = 0;
+        int i1380 = 0, i1381 = 0, i1382 = 0, i1383 = 0, i1384 = 0, i1385 = 0, i1386 = 0, i1387 = 0, i1388 = 0, i1389 = 0;
+        int i1390 = 0, i1391 = 0, i1392 = 0, i1393 = 0, i1394 = 0, i1395 = 0, i1396 = 0, i1397 = 0, i1398 = 0, i1399 = 0;
+        int i1400 = 0, i1401 = 0, i1402 = 0, i1403 = 0, i1404 = 0, i1405 = 0, i1406 = 0, i1407 = 0, i1408 = 0, i1409 = 0;
+        int i1410 = 0, i1411 = 0, i1412 = 0, i1413 = 0, i1414 = 0, i1415 = 0, i1416 = 0, i1417 = 0, i1418 = 0, i1419 = 0;
+        int i1420 = 0, i1421 = 0, i1422 = 0, i1423 = 0, i1424 = 0, i1425 = 0, i1426 = 0, i1427 = 0, i1428 = 0, i1429 = 0;
+        int i1430 = 0, i1431 = 0, i1432 = 0, i1433 = 0, i1434 = 0, i1435 = 0, i1436 = 0, i1437 = 0, i1438 = 0, i1439 = 0;
+        int i1440 = 0, i1441 = 0, i1442 = 0, i1443 = 0, i1444 = 0, i1445 = 0, i1446 = 0, i1447 = 0, i1448 = 0, i1449 = 0;
+        int i1450 = 0, i1451 = 0, i1452 = 0, i1453 = 0, i1454 = 0, i1455 = 0, i1456 = 0, i1457 = 0, i1458 = 0, i1459 = 0;
+        int i1460 = 0, i1461 = 0, i1462 = 0, i1463 = 0, i1464 = 0, i1465 = 0, i1466 = 0, i1467 = 0, i1468 = 0, i1469 = 0;
+        int i1470 = 0, i1471 = 0, i1472 = 0, i1473 = 0, i1474 = 0, i1475 = 0, i1476 = 0, i1477 = 0, i1478 = 0, i1479 = 0;
+        int i1480 = 0, i1481 = 0, i1482 = 0, i1483 = 0, i1484 = 0, i1485 = 0, i1486 = 0, i1487 = 0, i1488 = 0, i1489 = 0;
+        int i1490 = 0, i1491 = 0, i1492 = 0, i1493 = 0, i1494 = 0, i1495 = 0, i1496 = 0, i1497 = 0, i1498 = 0, i1499 = 0;
+        int i1500 = 0, i1501 = 0, i1502 = 0, i1503 = 0, i1504 = 0, i1505 = 0, i1506 = 0, i1507 = 0, i1508 = 0, i1509 = 0;
+        int i1510 = 0, i1511 = 0, i1512 = 0, i1513 = 0, i1514 = 0, i1515 = 0, i1516 = 0, i1517 = 0, i1518 = 0, i1519 = 0;
+        int i1520 = 0, i1521 = 0, i1522 = 0, i1523 = 0, i1524 = 0, i1525 = 0, i1526 = 0, i1527 = 0, i1528 = 0, i1529 = 0;
+        int i1530 = 0, i1531 = 0, i1532 = 0, i1533 = 0, i1534 = 0, i1535 = 0, i1536 = 0, i1537 = 0, i1538 = 0, i1539 = 0;
+        int i1540 = 0, i1541 = 0, i1542 = 0, i1543 = 0, i1544 = 0, i1545 = 0, i1546 = 0, i1547 = 0, i1548 = 0, i1549 = 0;
+        int i1550 = 0, i1551 = 0, i1552 = 0, i1553 = 0, i1554 = 0, i1555 = 0, i1556 = 0, i1557 = 0, i1558 = 0, i1559 = 0;
+        int i1560 = 0, i1561 = 0, i1562 = 0, i1563 = 0, i1564 = 0, i1565 = 0, i1566 = 0, i1567 = 0, i1568 = 0, i1569 = 0;
+        int i1570 = 0, i1571 = 0, i1572 = 0, i1573 = 0, i1574 = 0, i1575 = 0, i1576 = 0, i1577 = 0, i1578 = 0, i1579 = 0;
+        int i1580 = 0, i1581 = 0, i1582 = 0, i1583 = 0, i1584 = 0, i1585 = 0, i1586 = 0, i1587 = 0, i1588 = 0, i1589 = 0;
+        int i1590 = 0, i1591 = 0, i1592 = 0, i1593 = 0, i1594 = 0, i1595 = 0, i1596 = 0, i1597 = 0, i1598 = 0, i1599 = 0;
+        int i1600 = 0, i1601 = 0, i1602 = 0, i1603 = 0, i1604 = 0, i1605 = 0, i1606 = 0, i1607 = 0, i1608 = 0, i1609 = 0;
+        int i1610 = 0, i1611 = 0, i1612 = 0, i1613 = 0, i1614 = 0, i1615 = 0, i1616 = 0, i1617 = 0, i1618 = 0, i1619 = 0;
+        int i1620 = 0, i1621 = 0, i1622 = 0, i1623 = 0, i1624 = 0, i1625 = 0, i1626 = 0, i1627 = 0, i1628 = 0, i1629 = 0;
+        int i1630 = 0, i1631 = 0, i1632 = 0, i1633 = 0, i1634 = 0, i1635 = 0, i1636 = 0, i1637 = 0, i1638 = 0, i1639 = 0;
+        int i1640 = 0, i1641 = 0, i1642 = 0, i1643 = 0, i1644 = 0, i1645 = 0, i1646 = 0, i1647 = 0, i1648 = 0, i1649 = 0;
+        int i1650 = 0, i1651 = 0, i1652 = 0, i1653 = 0, i1654 = 0, i1655 = 0, i1656 = 0, i1657 = 0, i1658 = 0, i1659 = 0;
+        int i1660 = 0, i1661 = 0, i1662 = 0, i1663 = 0, i1664 = 0, i1665 = 0, i1666 = 0, i1667 = 0, i1668 = 0, i1669 = 0;
+        int i1670 = 0, i1671 = 0, i1672 = 0, i1673 = 0, i1674 = 0, i1675 = 0, i1676 = 0, i1677 = 0, i1678 = 0, i1679 = 0;
+        int i1680 = 0, i1681 = 0, i1682 = 0, i1683 = 0, i1684 = 0, i1685 = 0, i1686 = 0, i1687 = 0, i1688 = 0, i1689 = 0;
+        int i1690 = 0, i1691 = 0, i1692 = 0, i1693 = 0, i1694 = 0, i1695 = 0, i1696 = 0, i1697 = 0, i1698 = 0, i1699 = 0;
+        int i1700 = 0, i1701 = 0, i1702 = 0, i1703 = 0, i1704 = 0, i1705 = 0, i1706 = 0, i1707 = 0, i1708 = 0, i1709 = 0;
+        int i1710 = 0, i1711 = 0, i1712 = 0, i1713 = 0, i1714 = 0, i1715 = 0, i1716 = 0, i1717 = 0, i1718 = 0, i1719 = 0;
+        int i1720 = 0, i1721 = 0, i1722 = 0, i1723 = 0, i1724 = 0, i1725 = 0, i1726 = 0, i1727 = 0, i1728 = 0, i1729 = 0;
+        int i1730 = 0, i1731 = 0, i1732 = 0, i1733 = 0, i1734 = 0, i1735 = 0, i1736 = 0, i1737 = 0, i1738 = 0, i1739 = 0;
+        int i1740 = 0, i1741 = 0, i1742 = 0, i1743 = 0, i1744 = 0, i1745 = 0, i1746 = 0, i1747 = 0, i1748 = 0, i1749 = 0;
+        int i1750 = 0, i1751 = 0, i1752 = 0, i1753 = 0, i1754 = 0, i1755 = 0, i1756 = 0, i1757 = 0, i1758 = 0, i1759 = 0;
+        int i1760 = 0, i1761 = 0, i1762 = 0, i1763 = 0, i1764 = 0, i1765 = 0, i1766 = 0, i1767 = 0, i1768 = 0, i1769 = 0;
+        int i1770 = 0, i1771 = 0, i1772 = 0, i1773 = 0, i1774 = 0, i1775 = 0, i1776 = 0, i1777 = 0, i1778 = 0, i1779 = 0;
+        int i1780 = 0, i1781 = 0, i1782 = 0, i1783 = 0, i1784 = 0, i1785 = 0, i1786 = 0, i1787 = 0, i1788 = 0, i1789 = 0;
+        int i1790 = 0, i1791 = 0, i1792 = 0, i1793 = 0, i1794 = 0, i1795 = 0, i1796 = 0, i1797 = 0, i1798 = 0, i1799 = 0;
+        int i1800 = 0, i1801 = 0, i1802 = 0, i1803 = 0, i1804 = 0, i1805 = 0, i1806 = 0, i1807 = 0, i1808 = 0, i1809 = 0;
+        int i1810 = 0, i1811 = 0, i1812 = 0, i1813 = 0, i1814 = 0, i1815 = 0, i1816 = 0, i1817 = 0, i1818 = 0, i1819 = 0;
+        int i1820 = 0, i1821 = 0, i1822 = 0, i1823 = 0, i1824 = 0, i1825 = 0, i1826 = 0, i1827 = 0, i1828 = 0, i1829 = 0;
+        int i1830 = 0, i1831 = 0, i1832 = 0, i1833 = 0, i1834 = 0, i1835 = 0, i1836 = 0, i1837 = 0, i1838 = 0, i1839 = 0;
+        int i1840 = 0, i1841 = 0, i1842 = 0, i1843 = 0, i1844 = 0, i1845 = 0, i1846 = 0, i1847 = 0, i1848 = 0, i1849 = 0;
+        int i1850 = 0, i1851 = 0, i1852 = 0, i1853 = 0, i1854 = 0, i1855 = 0, i1856 = 0, i1857 = 0, i1858 = 0, i1859 = 0;
+        int i1860 = 0, i1861 = 0, i1862 = 0, i1863 = 0, i1864 = 0, i1865 = 0, i1866 = 0, i1867 = 0, i1868 = 0, i1869 = 0;
+        int i1870 = 0, i1871 = 0, i1872 = 0, i1873 = 0, i1874 = 0, i1875 = 0, i1876 = 0, i1877 = 0, i1878 = 0, i1879 = 0;
+        int i1880 = 0, i1881 = 0, i1882 = 0, i1883 = 0, i1884 = 0, i1885 = 0, i1886 = 0, i1887 = 0, i1888 = 0, i1889 = 0;
+        int i1890 = 0, i1891 = 0, i1892 = 0, i1893 = 0, i1894 = 0, i1895 = 0, i1896 = 0, i1897 = 0, i1898 = 0, i1899 = 0;
+        int i1900 = 0, i1901 = 0, i1902 = 0, i1903 = 0, i1904 = 0, i1905 = 0, i1906 = 0, i1907 = 0, i1908 = 0, i1909 = 0;
+        int i1910 = 0, i1911 = 0, i1912 = 0, i1913 = 0, i1914 = 0, i1915 = 0, i1916 = 0, i1917 = 0, i1918 = 0, i1919 = 0;
+        int i1920 = 0, i1921 = 0, i1922 = 0, i1923 = 0, i1924 = 0, i1925 = 0, i1926 = 0, i1927 = 0, i1928 = 0, i1929 = 0;
+        int i1930 = 0, i1931 = 0, i1932 = 0, i1933 = 0, i1934 = 0, i1935 = 0, i1936 = 0, i1937 = 0, i1938 = 0, i1939 = 0;
+        int i1940 = 0, i1941 = 0, i1942 = 0, i1943 = 0, i1944 = 0, i1945 = 0, i1946 = 0, i1947 = 0, i1948 = 0, i1949 = 0;
+        int i1950 = 0, i1951 = 0, i1952 = 0, i1953 = 0, i1954 = 0, i1955 = 0, i1956 = 0, i1957 = 0, i1958 = 0, i1959 = 0;
+        int i1960 = 0, i1961 = 0, i1962 = 0, i1963 = 0, i1964 = 0, i1965 = 0, i1966 = 0, i1967 = 0, i1968 = 0, i1969 = 0;
+        int i1970 = 0, i1971 = 0, i1972 = 0, i1973 = 0, i1974 = 0, i1975 = 0, i1976 = 0, i1977 = 0, i1978 = 0, i1979 = 0;
+        int i1980 = 0, i1981 = 0, i1982 = 0, i1983 = 0, i1984 = 0, i1985 = 0, i1986 = 0, i1987 = 0, i1988 = 0, i1989 = 0;
+        int i1990 = 0, i1991 = 0, i1992 = 0, i1993 = 0, i1994 = 0, i1995 = 0, i1996 = 0, i1997 = 0, i1998 = 0, i1999 = 0;
+        int i2000 = 0, i2001 = 0, i2002 = 0, i2003 = 0, i2004 = 0, i2005 = 0, i2006 = 0, i2007 = 0, i2008 = 0, i2009 = 0;
+        int i2010 = 0, i2011 = 0, i2012 = 0, i2013 = 0, i2014 = 0, i2015 = 0, i2016 = 0, i2017 = 0, i2018 = 0, i2019 = 0;
+        int i2020 = 0, i2021 = 0, i2022 = 0, i2023 = 0, i2024 = 0, i2025 = 0, i2026 = 0, i2027 = 0, i2028 = 0, i2029 = 0;
+        int i2030 = 0, i2031 = 0, i2032 = 0, i2033 = 0, i2034 = 0, i2035 = 0, i2036 = 0, i2037 = 0, i2038 = 0, i2039 = 0;
+        int i2040 = 0, i2041 = 0, i2042 = 0, i2043 = 0, i2044 = 0, i2045 = 0, i2046 = 0, i2047 = 0, i2048 = 0, i2049 = 0;
+        int i2050 = 0, i2051 = 0, i2052 = 0, i2053 = 0, i2054 = 0, i2055 = 0, i2056 = 0, i2057 = 0, i2058 = 0, i2059 = 0;
+        int i2060 = 0, i2061 = 0, i2062 = 0, i2063 = 0, i2064 = 0, i2065 = 0, i2066 = 0, i2067 = 0, i2068 = 0, i2069 = 0;
+        int i2070 = 0, i2071 = 0, i2072 = 0, i2073 = 0, i2074 = 0, i2075 = 0, i2076 = 0, i2077 = 0, i2078 = 0, i2079 = 0;
+        int i2080 = 0, i2081 = 0, i2082 = 0, i2083 = 0, i2084 = 0, i2085 = 0, i2086 = 0, i2087 = 0, i2088 = 0, i2089 = 0;
+        int i2090 = 0, i2091 = 0, i2092 = 0, i2093 = 0, i2094 = 0, i2095 = 0, i2096 = 0, i2097 = 0, i2098 = 0, i2099 = 0;
+        int i2100 = 0, i2101 = 0, i2102 = 0, i2103 = 0, i2104 = 0, i2105 = 0, i2106 = 0, i2107 = 0, i2108 = 0, i2109 = 0;
+        int i2110 = 0, i2111 = 0, i2112 = 0, i2113 = 0, i2114 = 0, i2115 = 0, i2116 = 0, i2117 = 0, i2118 = 0, i2119 = 0;
+        int i2120 = 0, i2121 = 0, i2122 = 0, i2123 = 0, i2124 = 0, i2125 = 0, i2126 = 0, i2127 = 0, i2128 = 0, i2129 = 0;
+        int i2130 = 0, i2131 = 0, i2132 = 0, i2133 = 0, i2134 = 0, i2135 = 0, i2136 = 0, i2137 = 0, i2138 = 0, i2139 = 0;
+        int i2140 = 0, i2141 = 0, i2142 = 0, i2143 = 0, i2144 = 0, i2145 = 0, i2146 = 0, i2147 = 0, i2148 = 0, i2149 = 0;
+        int i2150 = 0, i2151 = 0, i2152 = 0, i2153 = 0, i2154 = 0, i2155 = 0, i2156 = 0, i2157 = 0, i2158 = 0, i2159 = 0;
+        int i2160 = 0, i2161 = 0, i2162 = 0, i2163 = 0, i2164 = 0, i2165 = 0, i2166 = 0, i2167 = 0, i2168 = 0, i2169 = 0;
+        int i2170 = 0, i2171 = 0, i2172 = 0, i2173 = 0, i2174 = 0, i2175 = 0, i2176 = 0, i2177 = 0, i2178 = 0, i2179 = 0;
+        int i2180 = 0, i2181 = 0, i2182 = 0, i2183 = 0, i2184 = 0, i2185 = 0, i2186 = 0, i2187 = 0, i2188 = 0, i2189 = 0;
+        int i2190 = 0, i2191 = 0, i2192 = 0, i2193 = 0, i2194 = 0, i2195 = 0, i2196 = 0, i2197 = 0, i2198 = 0, i2199 = 0;
+        int i2200 = 0, i2201 = 0, i2202 = 0, i2203 = 0, i2204 = 0, i2205 = 0, i2206 = 0, i2207 = 0, i2208 = 0, i2209 = 0;
+        int i2210 = 0, i2211 = 0, i2212 = 0, i2213 = 0, i2214 = 0, i2215 = 0, i2216 = 0, i2217 = 0, i2218 = 0, i2219 = 0;
+        int i2220 = 0, i2221 = 0, i2222 = 0, i2223 = 0, i2224 = 0, i2225 = 0, i2226 = 0, i2227 = 0, i2228 = 0, i2229 = 0;
+        int i2230 = 0, i2231 = 0, i2232 = 0, i2233 = 0, i2234 = 0, i2235 = 0, i2236 = 0, i2237 = 0, i2238 = 0, i2239 = 0;
+        int i2240 = 0, i2241 = 0, i2242 = 0, i2243 = 0, i2244 = 0, i2245 = 0, i2246 = 0, i2247 = 0, i2248 = 0, i2249 = 0;
+        int i2250 = 0, i2251 = 0, i2252 = 0, i2253 = 0, i2254 = 0, i2255 = 0, i2256 = 0, i2257 = 0, i2258 = 0, i2259 = 0;
+        int i2260 = 0, i2261 = 0, i2262 = 0, i2263 = 0, i2264 = 0, i2265 = 0, i2266 = 0, i2267 = 0, i2268 = 0, i2269 = 0;
+        int i2270 = 0, i2271 = 0, i2272 = 0, i2273 = 0, i2274 = 0, i2275 = 0, i2276 = 0, i2277 = 0, i2278 = 0, i2279 = 0;
+        int i2280 = 0, i2281 = 0, i2282 = 0, i2283 = 0, i2284 = 0, i2285 = 0, i2286 = 0, i2287 = 0, i2288 = 0, i2289 = 0;
+        int i2290 = 0, i2291 = 0, i2292 = 0, i2293 = 0, i2294 = 0, i2295 = 0, i2296 = 0, i2297 = 0, i2298 = 0, i2299 = 0;
+        int i2300 = 0, i2301 = 0, i2302 = 0, i2303 = 0, i2304 = 0, i2305 = 0, i2306 = 0, i2307 = 0, i2308 = 0, i2309 = 0;
+        int i2310 = 0, i2311 = 0, i2312 = 0, i2313 = 0, i2314 = 0, i2315 = 0, i2316 = 0, i2317 = 0, i2318 = 0, i2319 = 0;
+        int i2320 = 0, i2321 = 0, i2322 = 0, i2323 = 0, i2324 = 0, i2325 = 0, i2326 = 0, i2327 = 0, i2328 = 0, i2329 = 0;
+        int i2330 = 0, i2331 = 0, i2332 = 0, i2333 = 0, i2334 = 0, i2335 = 0, i2336 = 0, i2337 = 0, i2338 = 0, i2339 = 0;
+        int i2340 = 0, i2341 = 0, i2342 = 0, i2343 = 0, i2344 = 0, i2345 = 0, i2346 = 0, i2347 = 0, i2348 = 0, i2349 = 0;
+        int i2350 = 0, i2351 = 0, i2352 = 0, i2353 = 0, i2354 = 0, i2355 = 0, i2356 = 0, i2357 = 0, i2358 = 0, i2359 = 0;
+        int i2360 = 0, i2361 = 0, i2362 = 0, i2363 = 0, i2364 = 0, i2365 = 0, i2366 = 0, i2367 = 0, i2368 = 0, i2369 = 0;
+        int i2370 = 0, i2371 = 0, i2372 = 0, i2373 = 0, i2374 = 0, i2375 = 0, i2376 = 0, i2377 = 0, i2378 = 0, i2379 = 0;
+        int i2380 = 0, i2381 = 0, i2382 = 0, i2383 = 0, i2384 = 0, i2385 = 0, i2386 = 0, i2387 = 0, i2388 = 0, i2389 = 0;
+        int i2390 = 0, i2391 = 0, i2392 = 0, i2393 = 0, i2394 = 0, i2395 = 0, i2396 = 0, i2397 = 0, i2398 = 0, i2399 = 0;
+        int i2400 = 0, i2401 = 0, i2402 = 0, i2403 = 0, i2404 = 0, i2405 = 0, i2406 = 0, i2407 = 0, i2408 = 0, i2409 = 0;
+        int i2410 = 0, i2411 = 0, i2412 = 0, i2413 = 0, i2414 = 0, i2415 = 0, i2416 = 0, i2417 = 0, i2418 = 0, i2419 = 0;
+        int i2420 = 0, i2421 = 0, i2422 = 0, i2423 = 0, i2424 = 0, i2425 = 0, i2426 = 0, i2427 = 0, i2428 = 0, i2429 = 0;
+        int i2430 = 0, i2431 = 0, i2432 = 0, i2433 = 0, i2434 = 0, i2435 = 0, i2436 = 0, i2437 = 0, i2438 = 0, i2439 = 0;
+        int i2440 = 0, i2441 = 0, i2442 = 0, i2443 = 0, i2444 = 0, i2445 = 0, i2446 = 0, i2447 = 0, i2448 = 0, i2449 = 0;
+        int i2450 = 0, i2451 = 0, i2452 = 0, i2453 = 0, i2454 = 0, i2455 = 0, i2456 = 0, i2457 = 0, i2458 = 0, i2459 = 0;
+        int i2460 = 0, i2461 = 0, i2462 = 0, i2463 = 0, i2464 = 0, i2465 = 0, i2466 = 0, i2467 = 0, i2468 = 0, i2469 = 0;
+        int i2470 = 0, i2471 = 0, i2472 = 0, i2473 = 0, i2474 = 0, i2475 = 0, i2476 = 0, i2477 = 0, i2478 = 0, i2479 = 0;
+        int i2480 = 0, i2481 = 0, i2482 = 0, i2483 = 0, i2484 = 0, i2485 = 0, i2486 = 0, i2487 = 0, i2488 = 0, i2489 = 0;
+        int i2490 = 0, i2491 = 0, i2492 = 0, i2493 = 0, i2494 = 0, i2495 = 0, i2496 = 0, i2497 = 0, i2498 = 0, i2499 = 0;
+        int i2500 = 0, i2501 = 0, i2502 = 0, i2503 = 0, i2504 = 0, i2505 = 0, i2506 = 0, i2507 = 0, i2508 = 0, i2509 = 0;
+        int i2510 = 0, i2511 = 0, i2512 = 0, i2513 = 0, i2514 = 0, i2515 = 0, i2516 = 0, i2517 = 0, i2518 = 0, i2519 = 0;
+        int i2520 = 0, i2521 = 0, i2522 = 0, i2523 = 0, i2524 = 0, i2525 = 0, i2526 = 0, i2527 = 0, i2528 = 0, i2529 = 0;
+        int i2530 = 0, i2531 = 0, i2532 = 0, i2533 = 0, i2534 = 0, i2535 = 0, i2536 = 0, i2537 = 0, i2538 = 0, i2539 = 0;
+        int i2540 = 0, i2541 = 0, i2542 = 0, i2543 = 0, i2544 = 0, i2545 = 0, i2546 = 0, i2547 = 0, i2548 = 0, i2549 = 0;
+        int i2550 = 0, i2551 = 0, i2552 = 0, i2553 = 0, i2554 = 0, i2555 = 0, i2556 = 0, i2557 = 0, i2558 = 0, i2559 = 0;
+        int i2560 = 0, i2561 = 0, i2562 = 0, i2563 = 0, i2564 = 0, i2565 = 0, i2566 = 0, i2567 = 0, i2568 = 0, i2569 = 0;
+        int i2570 = 0, i2571 = 0, i2572 = 0, i2573 = 0, i2574 = 0, i2575 = 0, i2576 = 0, i2577 = 0, i2578 = 0, i2579 = 0;
+        int i2580 = 0, i2581 = 0, i2582 = 0, i2583 = 0, i2584 = 0, i2585 = 0, i2586 = 0, i2587 = 0, i2588 = 0, i2589 = 0;
+        int i2590 = 0, i2591 = 0, i2592 = 0, i2593 = 0, i2594 = 0, i2595 = 0, i2596 = 0, i2597 = 0, i2598 = 0, i2599 = 0;
+        int i2600 = 0, i2601 = 0, i2602 = 0, i2603 = 0, i2604 = 0, i2605 = 0, i2606 = 0, i2607 = 0, i2608 = 0, i2609 = 0;
+        int i2610 = 0, i2611 = 0, i2612 = 0, i2613 = 0, i2614 = 0, i2615 = 0, i2616 = 0, i2617 = 0, i2618 = 0, i2619 = 0;
+        int i2620 = 0, i2621 = 0, i2622 = 0, i2623 = 0, i2624 = 0, i2625 = 0, i2626 = 0, i2627 = 0, i2628 = 0, i2629 = 0;
+        int i2630 = 0, i2631 = 0, i2632 = 0, i2633 = 0, i2634 = 0, i2635 = 0, i2636 = 0, i2637 = 0, i2638 = 0, i2639 = 0;
+        int i2640 = 0, i2641 = 0, i2642 = 0, i2643 = 0, i2644 = 0, i2645 = 0, i2646 = 0, i2647 = 0, i2648 = 0, i2649 = 0;
+        int i2650 = 0, i2651 = 0, i2652 = 0, i2653 = 0, i2654 = 0, i2655 = 0, i2656 = 0, i2657 = 0, i2658 = 0, i2659 = 0;
+        int i2660 = 0, i2661 = 0, i2662 = 0, i2663 = 0, i2664 = 0, i2665 = 0, i2666 = 0, i2667 = 0, i2668 = 0, i2669 = 0;
+        int i2670 = 0, i2671 = 0, i2672 = 0, i2673 = 0, i2674 = 0, i2675 = 0, i2676 = 0, i2677 = 0, i2678 = 0, i2679 = 0;
+        int i2680 = 0, i2681 = 0, i2682 = 0, i2683 = 0, i2684 = 0, i2685 = 0, i2686 = 0, i2687 = 0, i2688 = 0, i2689 = 0;
+        int i2690 = 0, i2691 = 0, i2692 = 0, i2693 = 0, i2694 = 0, i2695 = 0, i2696 = 0, i2697 = 0, i2698 = 0, i2699 = 0;
+        int i2700 = 0, i2701 = 0, i2702 = 0, i2703 = 0, i2704 = 0, i2705 = 0, i2706 = 0, i2707 = 0, i2708 = 0, i2709 = 0;
+        int i2710 = 0, i2711 = 0, i2712 = 0, i2713 = 0, i2714 = 0, i2715 = 0, i2716 = 0, i2717 = 0, i2718 = 0, i2719 = 0;
+        int i2720 = 0, i2721 = 0, i2722 = 0, i2723 = 0, i2724 = 0, i2725 = 0, i2726 = 0, i2727 = 0, i2728 = 0, i2729 = 0;
+        int i2730 = 0, i2731 = 0, i2732 = 0, i2733 = 0, i2734 = 0, i2735 = 0, i2736 = 0, i2737 = 0, i2738 = 0, i2739 = 0;
+        int i2740 = 0, i2741 = 0, i2742 = 0, i2743 = 0, i2744 = 0, i2745 = 0, i2746 = 0, i2747 = 0, i2748 = 0, i2749 = 0;
+        int i2750 = 0, i2751 = 0, i2752 = 0, i2753 = 0, i2754 = 0, i2755 = 0, i2756 = 0, i2757 = 0, i2758 = 0, i2759 = 0;
+        int i2760 = 0, i2761 = 0, i2762 = 0, i2763 = 0, i2764 = 0, i2765 = 0, i2766 = 0, i2767 = 0, i2768 = 0, i2769 = 0;
+        int i2770 = 0, i2771 = 0, i2772 = 0, i2773 = 0, i2774 = 0, i2775 = 0, i2776 = 0, i2777 = 0, i2778 = 0, i2779 = 0;
+        int i2780 = 0, i2781 = 0, i2782 = 0, i2783 = 0, i2784 = 0, i2785 = 0, i2786 = 0, i2787 = 0, i2788 = 0, i2789 = 0;
+        int i2790 = 0, i2791 = 0, i2792 = 0, i2793 = 0, i2794 = 0, i2795 = 0, i2796 = 0, i2797 = 0, i2798 = 0, i2799 = 0;
+        int i2800 = 0, i2801 = 0, i2802 = 0, i2803 = 0, i2804 = 0, i2805 = 0, i2806 = 0, i2807 = 0, i2808 = 0, i2809 = 0;
+        int i2810 = 0, i2811 = 0, i2812 = 0, i2813 = 0, i2814 = 0, i2815 = 0, i2816 = 0, i2817 = 0, i2818 = 0, i2819 = 0;
+        int i2820 = 0, i2821 = 0, i2822 = 0, i2823 = 0, i2824 = 0, i2825 = 0, i2826 = 0, i2827 = 0, i2828 = 0, i2829 = 0;
+        int i2830 = 0, i2831 = 0, i2832 = 0, i2833 = 0, i2834 = 0, i2835 = 0, i2836 = 0, i2837 = 0, i2838 = 0, i2839 = 0;
+        int i2840 = 0, i2841 = 0, i2842 = 0, i2843 = 0, i2844 = 0, i2845 = 0, i2846 = 0, i2847 = 0, i2848 = 0, i2849 = 0;
+        int i2850 = 0, i2851 = 0, i2852 = 0, i2853 = 0, i2854 = 0, i2855 = 0, i2856 = 0, i2857 = 0, i2858 = 0, i2859 = 0;
+        int i2860 = 0, i2861 = 0, i2862 = 0, i2863 = 0, i2864 = 0, i2865 = 0, i2866 = 0, i2867 = 0, i2868 = 0, i2869 = 0;
+        int i2870 = 0, i2871 = 0, i2872 = 0, i2873 = 0, i2874 = 0, i2875 = 0, i2876 = 0, i2877 = 0, i2878 = 0, i2879 = 0;
+        int i2880 = 0, i2881 = 0, i2882 = 0, i2883 = 0, i2884 = 0, i2885 = 0, i2886 = 0, i2887 = 0, i2888 = 0, i2889 = 0;
+        int i2890 = 0, i2891 = 0, i2892 = 0, i2893 = 0, i2894 = 0, i2895 = 0, i2896 = 0, i2897 = 0, i2898 = 0, i2899 = 0;
+        int i2900 = 0, i2901 = 0, i2902 = 0, i2903 = 0, i2904 = 0, i2905 = 0, i2906 = 0, i2907 = 0, i2908 = 0, i2909 = 0;
+        int i2910 = 0, i2911 = 0, i2912 = 0, i2913 = 0, i2914 = 0, i2915 = 0, i2916 = 0, i2917 = 0, i2918 = 0, i2919 = 0;
+        int i2920 = 0, i2921 = 0, i2922 = 0, i2923 = 0, i2924 = 0, i2925 = 0, i2926 = 0, i2927 = 0, i2928 = 0, i2929 = 0;
+        int i2930 = 0, i2931 = 0, i2932 = 0, i2933 = 0, i2934 = 0, i2935 = 0, i2936 = 0, i2937 = 0, i2938 = 0, i2939 = 0;
+        int i2940 = 0, i2941 = 0, i2942 = 0, i2943 = 0, i2944 = 0, i2945 = 0, i2946 = 0, i2947 = 0, i2948 = 0, i2949 = 0;
+        int i2950 = 0, i2951 = 0, i2952 = 0, i2953 = 0, i2954 = 0, i2955 = 0, i2956 = 0, i2957 = 0, i2958 = 0, i2959 = 0;
+        int i2960 = 0, i2961 = 0, i2962 = 0, i2963 = 0, i2964 = 0, i2965 = 0, i2966 = 0, i2967 = 0, i2968 = 0, i2969 = 0;
+        int i2970 = 0, i2971 = 0, i2972 = 0, i2973 = 0, i2974 = 0, i2975 = 0, i2976 = 0, i2977 = 0, i2978 = 0, i2979 = 0;
+        int i2980 = 0, i2981 = 0, i2982 = 0, i2983 = 0, i2984 = 0, i2985 = 0, i2986 = 0, i2987 = 0, i2988 = 0, i2989 = 0;
+        int i2990 = 0, i2991 = 0, i2992 = 0, i2993 = 0, i2994 = 0, i2995 = 0, i2996 = 0, i2997 = 0, i2998 = 0, i2999 = 0;
+        int i3000 = 0, i3001 = 0, i3002 = 0, i3003 = 0, i3004 = 0, i3005 = 0, i3006 = 0, i3007 = 0, i3008 = 0, i3009 = 0;
+        int i3010 = 0, i3011 = 0, i3012 = 0, i3013 = 0, i3014 = 0, i3015 = 0, i3016 = 0, i3017 = 0, i3018 = 0, i3019 = 0;
+        int i3020 = 0, i3021 = 0, i3022 = 0, i3023 = 0, i3024 = 0, i3025 = 0, i3026 = 0, i3027 = 0, i3028 = 0, i3029 = 0;
+        int i3030 = 0, i3031 = 0, i3032 = 0, i3033 = 0, i3034 = 0, i3035 = 0, i3036 = 0, i3037 = 0, i3038 = 0, i3039 = 0;
+        int i3040 = 0, i3041 = 0, i3042 = 0, i3043 = 0, i3044 = 0, i3045 = 0, i3046 = 0, i3047 = 0, i3048 = 0, i3049 = 0;
+        int i3050 = 0, i3051 = 0, i3052 = 0, i3053 = 0, i3054 = 0, i3055 = 0, i3056 = 0, i3057 = 0, i3058 = 0, i3059 = 0;
+        int i3060 = 0, i3061 = 0, i3062 = 0, i3063 = 0, i3064 = 0, i3065 = 0, i3066 = 0, i3067 = 0, i3068 = 0, i3069 = 0;
+        int i3070 = 0, i3071 = 0, i3072 = 0, i3073 = 0, i3074 = 0, i3075 = 0, i3076 = 0, i3077 = 0, i3078 = 0, i3079 = 0;
+        int i3080 = 0, i3081 = 0, i3082 = 0, i3083 = 0, i3084 = 0, i3085 = 0, i3086 = 0, i3087 = 0, i3088 = 0, i3089 = 0;
+        int i3090 = 0, i3091 = 0, i3092 = 0, i3093 = 0, i3094 = 0, i3095 = 0, i3096 = 0, i3097 = 0, i3098 = 0, i3099 = 0;
+        int i3100 = 0, i3101 = 0, i3102 = 0, i3103 = 0, i3104 = 0, i3105 = 0, i3106 = 0, i3107 = 0, i3108 = 0, i3109 = 0;
+        int i3110 = 0, i3111 = 0, i3112 = 0, i3113 = 0, i3114 = 0, i3115 = 0, i3116 = 0, i3117 = 0, i3118 = 0, i3119 = 0;
+        int i3120 = 0, i3121 = 0, i3122 = 0, i3123 = 0, i3124 = 0, i3125 = 0, i3126 = 0, i3127 = 0, i3128 = 0, i3129 = 0;
+        int i3130 = 0, i3131 = 0, i3132 = 0, i3133 = 0, i3134 = 0, i3135 = 0, i3136 = 0, i3137 = 0, i3138 = 0, i3139 = 0;
+        int i3140 = 0, i3141 = 0, i3142 = 0, i3143 = 0, i3144 = 0, i3145 = 0, i3146 = 0, i3147 = 0, i3148 = 0, i3149 = 0;
+        int i3150 = 0, i3151 = 0, i3152 = 0, i3153 = 0, i3154 = 0, i3155 = 0, i3156 = 0, i3157 = 0, i3158 = 0, i3159 = 0;
+        int i3160 = 0, i3161 = 0, i3162 = 0, i3163 = 0, i3164 = 0, i3165 = 0, i3166 = 0, i3167 = 0, i3168 = 0, i3169 = 0;
+        int i3170 = 0, i3171 = 0, i3172 = 0, i3173 = 0, i3174 = 0, i3175 = 0, i3176 = 0, i3177 = 0, i3178 = 0, i3179 = 0;
+        int i3180 = 0, i3181 = 0, i3182 = 0, i3183 = 0, i3184 = 0, i3185 = 0, i3186 = 0, i3187 = 0, i3188 = 0, i3189 = 0;
+        int i3190 = 0, i3191 = 0, i3192 = 0, i3193 = 0, i3194 = 0, i3195 = 0, i3196 = 0, i3197 = 0, i3198 = 0, i3199 = 0;
+        int i3200 = 0, i3201 = 0, i3202 = 0, i3203 = 0, i3204 = 0, i3205 = 0, i3206 = 0, i3207 = 0, i3208 = 0, i3209 = 0;
+        int i3210 = 0, i3211 = 0, i3212 = 0, i3213 = 0, i3214 = 0, i3215 = 0, i3216 = 0, i3217 = 0, i3218 = 0, i3219 = 0;
+        int i3220 = 0, i3221 = 0, i3222 = 0, i3223 = 0, i3224 = 0, i3225 = 0, i3226 = 0, i3227 = 0, i3228 = 0, i3229 = 0;
+        int i3230 = 0, i3231 = 0, i3232 = 0, i3233 = 0, i3234 = 0, i3235 = 0, i3236 = 0, i3237 = 0, i3238 = 0, i3239 = 0;
+        int i3240 = 0, i3241 = 0, i3242 = 0, i3243 = 0, i3244 = 0, i3245 = 0, i3246 = 0, i3247 = 0, i3248 = 0, i3249 = 0;
+        int i3250 = 0, i3251 = 0, i3252 = 0, i3253 = 0, i3254 = 0, i3255 = 0, i3256 = 0, i3257 = 0, i3258 = 0, i3259 = 0;
+        int i3260 = 0, i3261 = 0, i3262 = 0, i3263 = 0, i3264 = 0, i3265 = 0, i3266 = 0, i3267 = 0, i3268 = 0, i3269 = 0;
+        int i3270 = 0, i3271 = 0, i3272 = 0, i3273 = 0, i3274 = 0, i3275 = 0, i3276 = 0, i3277 = 0, i3278 = 0, i3279 = 0;
+        int i3280 = 0, i3281 = 0, i3282 = 0, i3283 = 0, i3284 = 0, i3285 = 0, i3286 = 0, i3287 = 0, i3288 = 0, i3289 = 0;
+        int i3290 = 0, i3291 = 0, i3292 = 0, i3293 = 0, i3294 = 0, i3295 = 0, i3296 = 0, i3297 = 0, i3298 = 0, i3299 = 0;
+        int i3300 = 0, i3301 = 0, i3302 = 0, i3303 = 0, i3304 = 0, i3305 = 0, i3306 = 0, i3307 = 0, i3308 = 0, i3309 = 0;
+        int i3310 = 0, i3311 = 0, i3312 = 0, i3313 = 0, i3314 = 0, i3315 = 0, i3316 = 0, i3317 = 0, i3318 = 0, i3319 = 0;
+        int i3320 = 0, i3321 = 0, i3322 = 0, i3323 = 0, i3324 = 0, i3325 = 0, i3326 = 0, i3327 = 0, i3328 = 0, i3329 = 0;
+        int i3330 = 0, i3331 = 0, i3332 = 0, i3333 = 0, i3334 = 0, i3335 = 0, i3336 = 0, i3337 = 0, i3338 = 0, i3339 = 0;
+        int i3340 = 0, i3341 = 0, i3342 = 0, i3343 = 0, i3344 = 0, i3345 = 0, i3346 = 0, i3347 = 0, i3348 = 0, i3349 = 0;
+        int i3350 = 0, i3351 = 0, i3352 = 0, i3353 = 0, i3354 = 0, i3355 = 0, i3356 = 0, i3357 = 0, i3358 = 0, i3359 = 0;
+        int i3360 = 0, i3361 = 0, i3362 = 0, i3363 = 0, i3364 = 0, i3365 = 0, i3366 = 0, i3367 = 0, i3368 = 0, i3369 = 0;
+        int i3370 = 0, i3371 = 0, i3372 = 0, i3373 = 0, i3374 = 0, i3375 = 0, i3376 = 0, i3377 = 0, i3378 = 0, i3379 = 0;
+        int i3380 = 0, i3381 = 0, i3382 = 0, i3383 = 0, i3384 = 0, i3385 = 0, i3386 = 0, i3387 = 0, i3388 = 0, i3389 = 0;
+        int i3390 = 0, i3391 = 0, i3392 = 0, i3393 = 0, i3394 = 0, i3395 = 0, i3396 = 0, i3397 = 0, i3398 = 0, i3399 = 0;
+        int i3400 = 0, i3401 = 0, i3402 = 0, i3403 = 0, i3404 = 0, i3405 = 0, i3406 = 0, i3407 = 0, i3408 = 0, i3409 = 0;
+        int i3410 = 0, i3411 = 0, i3412 = 0, i3413 = 0, i3414 = 0, i3415 = 0, i3416 = 0, i3417 = 0, i3418 = 0, i3419 = 0;
+        int i3420 = 0, i3421 = 0, i3422 = 0, i3423 = 0, i3424 = 0, i3425 = 0, i3426 = 0, i3427 = 0, i3428 = 0, i3429 = 0;
+        int i3430 = 0, i3431 = 0, i3432 = 0, i3433 = 0, i3434 = 0, i3435 = 0, i3436 = 0, i3437 = 0, i3438 = 0, i3439 = 0;
+        int i3440 = 0, i3441 = 0, i3442 = 0, i3443 = 0, i3444 = 0, i3445 = 0, i3446 = 0, i3447 = 0, i3448 = 0, i3449 = 0;
+        int i3450 = 0, i3451 = 0, i3452 = 0, i3453 = 0, i3454 = 0, i3455 = 0, i3456 = 0, i3457 = 0, i3458 = 0, i3459 = 0;
+        int i3460 = 0, i3461 = 0, i3462 = 0, i3463 = 0, i3464 = 0, i3465 = 0, i3466 = 0, i3467 = 0, i3468 = 0, i3469 = 0;
+        int i3470 = 0, i3471 = 0, i3472 = 0, i3473 = 0, i3474 = 0, i3475 = 0, i3476 = 0, i3477 = 0, i3478 = 0, i3479 = 0;
+        int i3480 = 0, i3481 = 0, i3482 = 0, i3483 = 0, i3484 = 0, i3485 = 0, i3486 = 0, i3487 = 0, i3488 = 0, i3489 = 0;
+        int i3490 = 0, i3491 = 0, i3492 = 0, i3493 = 0, i3494 = 0, i3495 = 0, i3496 = 0, i3497 = 0, i3498 = 0, i3499 = 0;
+        int i3500 = 0, i3501 = 0, i3502 = 0, i3503 = 0, i3504 = 0, i3505 = 0, i3506 = 0, i3507 = 0, i3508 = 0, i3509 = 0;
+        int i3510 = 0, i3511 = 0, i3512 = 0, i3513 = 0, i3514 = 0, i3515 = 0, i3516 = 0, i3517 = 0, i3518 = 0, i3519 = 0;
+        int i3520 = 0, i3521 = 0, i3522 = 0, i3523 = 0, i3524 = 0, i3525 = 0, i3526 = 0, i3527 = 0, i3528 = 0, i3529 = 0;
+        int i3530 = 0, i3531 = 0, i3532 = 0, i3533 = 0, i3534 = 0, i3535 = 0, i3536 = 0, i3537 = 0, i3538 = 0, i3539 = 0;
+        int i3540 = 0, i3541 = 0, i3542 = 0, i3543 = 0, i3544 = 0, i3545 = 0, i3546 = 0, i3547 = 0, i3548 = 0, i3549 = 0;
+        int i3550 = 0, i3551 = 0, i3552 = 0, i3553 = 0, i3554 = 0, i3555 = 0, i3556 = 0, i3557 = 0, i3558 = 0, i3559 = 0;
+        int i3560 = 0, i3561 = 0, i3562 = 0, i3563 = 0, i3564 = 0, i3565 = 0, i3566 = 0, i3567 = 0, i3568 = 0, i3569 = 0;
+        int i3570 = 0, i3571 = 0, i3572 = 0, i3573 = 0, i3574 = 0, i3575 = 0, i3576 = 0, i3577 = 0, i3578 = 0, i3579 = 0;
+        int i3580 = 0, i3581 = 0, i3582 = 0, i3583 = 0, i3584 = 0, i3585 = 0, i3586 = 0, i3587 = 0, i3588 = 0, i3589 = 0;
+        int i3590 = 0, i3591 = 0, i3592 = 0, i3593 = 0, i3594 = 0, i3595 = 0, i3596 = 0, i3597 = 0, i3598 = 0, i3599 = 0;
+        int i3600 = 0, i3601 = 0, i3602 = 0, i3603 = 0, i3604 = 0, i3605 = 0, i3606 = 0, i3607 = 0, i3608 = 0, i3609 = 0;
+        int i3610 = 0, i3611 = 0, i3612 = 0, i3613 = 0, i3614 = 0, i3615 = 0, i3616 = 0, i3617 = 0, i3618 = 0, i3619 = 0;
+        int i3620 = 0, i3621 = 0, i3622 = 0, i3623 = 0, i3624 = 0, i3625 = 0, i3626 = 0, i3627 = 0, i3628 = 0, i3629 = 0;
+        int i3630 = 0, i3631 = 0, i3632 = 0, i3633 = 0, i3634 = 0, i3635 = 0, i3636 = 0, i3637 = 0, i3638 = 0, i3639 = 0;
+        int i3640 = 0, i3641 = 0, i3642 = 0, i3643 = 0, i3644 = 0, i3645 = 0, i3646 = 0, i3647 = 0, i3648 = 0, i3649 = 0;
+        int i3650 = 0, i3651 = 0, i3652 = 0, i3653 = 0, i3654 = 0, i3655 = 0, i3656 = 0, i3657 = 0, i3658 = 0, i3659 = 0;
+        int i3660 = 0, i3661 = 0, i3662 = 0, i3663 = 0, i3664 = 0, i3665 = 0, i3666 = 0, i3667 = 0, i3668 = 0, i3669 = 0;
+        int i3670 = 0, i3671 = 0, i3672 = 0, i3673 = 0, i3674 = 0, i3675 = 0, i3676 = 0, i3677 = 0, i3678 = 0, i3679 = 0;
+        int i3680 = 0, i3681 = 0, i3682 = 0, i3683 = 0, i3684 = 0, i3685 = 0, i3686 = 0, i3687 = 0, i3688 = 0, i3689 = 0;
+        int i3690 = 0, i3691 = 0, i3692 = 0, i3693 = 0, i3694 = 0, i3695 = 0, i3696 = 0, i3697 = 0, i3698 = 0, i3699 = 0;
+        int i3700 = 0, i3701 = 0, i3702 = 0, i3703 = 0, i3704 = 0, i3705 = 0, i3706 = 0, i3707 = 0, i3708 = 0, i3709 = 0;
+        int i3710 = 0, i3711 = 0, i3712 = 0, i3713 = 0, i3714 = 0, i3715 = 0, i3716 = 0, i3717 = 0, i3718 = 0, i3719 = 0;
+        int i3720 = 0, i3721 = 0, i3722 = 0, i3723 = 0, i3724 = 0, i3725 = 0, i3726 = 0, i3727 = 0, i3728 = 0, i3729 = 0;
+        int i3730 = 0, i3731 = 0, i3732 = 0, i3733 = 0, i3734 = 0, i3735 = 0, i3736 = 0, i3737 = 0, i3738 = 0, i3739 = 0;
+        int i3740 = 0, i3741 = 0, i3742 = 0, i3743 = 0, i3744 = 0, i3745 = 0, i3746 = 0, i3747 = 0, i3748 = 0, i3749 = 0;
+        int i3750 = 0, i3751 = 0, i3752 = 0, i3753 = 0, i3754 = 0, i3755 = 0, i3756 = 0, i3757 = 0, i3758 = 0, i3759 = 0;
+        int i3760 = 0, i3761 = 0, i3762 = 0, i3763 = 0, i3764 = 0, i3765 = 0, i3766 = 0, i3767 = 0, i3768 = 0, i3769 = 0;
+        int i3770 = 0, i3771 = 0, i3772 = 0, i3773 = 0, i3774 = 0, i3775 = 0, i3776 = 0, i3777 = 0, i3778 = 0, i3779 = 0;
+        int i3780 = 0, i3781 = 0, i3782 = 0, i3783 = 0, i3784 = 0, i3785 = 0, i3786 = 0, i3787 = 0, i3788 = 0, i3789 = 0;
+        int i3790 = 0, i3791 = 0, i3792 = 0, i3793 = 0, i3794 = 0, i3795 = 0, i3796 = 0, i3797 = 0, i3798 = 0, i3799 = 0;
+        int i3800 = 0, i3801 = 0, i3802 = 0, i3803 = 0, i3804 = 0, i3805 = 0, i3806 = 0, i3807 = 0, i3808 = 0, i3809 = 0;
+        int i3810 = 0, i3811 = 0, i3812 = 0, i3813 = 0, i3814 = 0, i3815 = 0, i3816 = 0, i3817 = 0, i3818 = 0, i3819 = 0;
+        int i3820 = 0, i3821 = 0, i3822 = 0, i3823 = 0, i3824 = 0, i3825 = 0, i3826 = 0, i3827 = 0, i3828 = 0, i3829 = 0;
+        int i3830 = 0, i3831 = 0, i3832 = 0, i3833 = 0, i3834 = 0, i3835 = 0, i3836 = 0, i3837 = 0, i3838 = 0, i3839 = 0;
+        int i3840 = 0, i3841 = 0, i3842 = 0, i3843 = 0, i3844 = 0, i3845 = 0, i3846 = 0, i3847 = 0, i3848 = 0, i3849 = 0;
+        int i3850 = 0, i3851 = 0, i3852 = 0, i3853 = 0, i3854 = 0, i3855 = 0, i3856 = 0, i3857 = 0, i3858 = 0, i3859 = 0;
+        int i3860 = 0, i3861 = 0, i3862 = 0, i3863 = 0, i3864 = 0, i3865 = 0, i3866 = 0, i3867 = 0, i3868 = 0, i3869 = 0;
+        int i3870 = 0, i3871 = 0, i3872 = 0, i3873 = 0, i3874 = 0, i3875 = 0, i3876 = 0, i3877 = 0, i3878 = 0, i3879 = 0;
+        int i3880 = 0, i3881 = 0, i3882 = 0, i3883 = 0, i3884 = 0, i3885 = 0, i3886 = 0, i3887 = 0, i3888 = 0, i3889 = 0;
+        int i3890 = 0, i3891 = 0, i3892 = 0, i3893 = 0, i3894 = 0, i3895 = 0, i3896 = 0, i3897 = 0, i3898 = 0, i3899 = 0;
+        int i3900 = 0, i3901 = 0, i3902 = 0, i3903 = 0, i3904 = 0, i3905 = 0, i3906 = 0, i3907 = 0, i3908 = 0, i3909 = 0;
+        int i3910 = 0, i3911 = 0, i3912 = 0, i3913 = 0, i3914 = 0, i3915 = 0, i3916 = 0, i3917 = 0, i3918 = 0, i3919 = 0;
+        int i3920 = 0, i3921 = 0, i3922 = 0, i3923 = 0, i3924 = 0, i3925 = 0, i3926 = 0, i3927 = 0, i3928 = 0, i3929 = 0;
+        int i3930 = 0, i3931 = 0, i3932 = 0, i3933 = 0, i3934 = 0, i3935 = 0, i3936 = 0, i3937 = 0, i3938 = 0, i3939 = 0;
+        int i3940 = 0, i3941 = 0, i3942 = 0, i3943 = 0, i3944 = 0, i3945 = 0, i3946 = 0, i3947 = 0, i3948 = 0, i3949 = 0;
+        int i3950 = 0, i3951 = 0, i3952 = 0, i3953 = 0, i3954 = 0, i3955 = 0, i3956 = 0, i3957 = 0, i3958 = 0, i3959 = 0;
+        int i3960 = 0, i3961 = 0, i3962 = 0, i3963 = 0, i3964 = 0, i3965 = 0, i3966 = 0, i3967 = 0, i3968 = 0, i3969 = 0;
+        int i3970 = 0, i3971 = 0, i3972 = 0, i3973 = 0, i3974 = 0, i3975 = 0, i3976 = 0, i3977 = 0, i3978 = 0, i3979 = 0;
+        int i3980 = 0, i3981 = 0, i3982 = 0, i3983 = 0, i3984 = 0, i3985 = 0, i3986 = 0, i3987 = 0, i3988 = 0, i3989 = 0;
+        int i3990 = 0, i3991 = 0, i3992 = 0, i3993 = 0, i3994 = 0, i3995 = 0, i3996 = 0, i3997 = 0, i3998 = 0, i3999 = 0;
+        int i4000 = 0, i4001 = 0, i4002 = 0, i4003 = 0, i4004 = 0, i4005 = 0, i4006 = 0, i4007 = 0, i4008 = 0, i4009 = 0;
+        int i4010 = 0, i4011 = 0, i4012 = 0, i4013 = 0, i4014 = 0, i4015 = 0, i4016 = 0, i4017 = 0, i4018 = 0, i4019 = 0;
+        int i4020 = 0, i4021 = 0, i4022 = 0, i4023 = 0, i4024 = 0, i4025 = 0, i4026 = 0, i4027 = 0, i4028 = 0, i4029 = 0;
+        int i4030 = 0, i4031 = 0, i4032 = 0, i4033 = 0, i4034 = 0, i4035 = 0, i4036 = 0, i4037 = 0, i4038 = 0, i4039 = 0;
+        int i4040 = 0, i4041 = 0, i4042 = 0, i4043 = 0, i4044 = 0, i4045 = 0, i4046 = 0, i4047 = 0, i4048 = 0, i4049 = 0;
+        int i4050 = 0, i4051 = 0, i4052 = 0, i4053 = 0, i4054 = 0, i4055 = 0, i4056 = 0, i4057 = 0, i4058 = 0, i4059 = 0;
+        int i4060 = 0, i4061 = 0, i4062 = 0, i4063 = 0, i4064 = 0, i4065 = 0, i4066 = 0, i4067 = 0, i4068 = 0, i4069 = 0;
+        int i4070 = 0, i4071 = 0, i4072 = 0, i4073 = 0, i4074 = 0, i4075 = 0, i4076 = 0, i4077 = 0, i4078 = 0, i4079 = 0;
+        int i4080 = 0, i4081 = 0, i4082 = 0, i4083 = 0, i4084 = 0, i4085 = 0, i4086 = 0, i4087 = 0, i4088 = 0, i4089 = 0;
+        int i4090 = 0, i4091 = 0, i4092 = 0, i4093 = 0, i4094 = 0, i4095 = 0, i4096 = 0, i4097 = 0, i4098 = 0, i4099 = 0;
+        int i4100 = 0, i4101 = 0, i4102 = 0, i4103 = 0, i4104 = 0, i4105 = 0, i4106 = 0, i4107 = 0, i4108 = 0, i4109 = 0;
+        int i4110 = 0, i4111 = 0, i4112 = 0, i4113 = 0, i4114 = 0, i4115 = 0, i4116 = 0, i4117 = 0, i4118 = 0, i4119 = 0;
+        int i4120 = 0, i4121 = 0, i4122 = 0, i4123 = 0, i4124 = 0, i4125 = 0, i4126 = 0, i4127 = 0, i4128 = 0, i4129 = 0;
+        int i4130 = 0, i4131 = 0, i4132 = 0, i4133 = 0, i4134 = 0, i4135 = 0, i4136 = 0, i4137 = 0, i4138 = 0, i4139 = 0;
+        int i4140 = 0, i4141 = 0, i4142 = 0, i4143 = 0, i4144 = 0, i4145 = 0, i4146 = 0, i4147 = 0, i4148 = 0, i4149 = 0;
+        int i4150 = 0, i4151 = 0, i4152 = 0, i4153 = 0, i4154 = 0, i4155 = 0, i4156 = 0, i4157 = 0, i4158 = 0, i4159 = 0;
+        int i4160 = 0, i4161 = 0, i4162 = 0, i4163 = 0, i4164 = 0, i4165 = 0, i4166 = 0, i4167 = 0, i4168 = 0, i4169 = 0;
+        int i4170 = 0, i4171 = 0, i4172 = 0, i4173 = 0, i4174 = 0, i4175 = 0, i4176 = 0, i4177 = 0, i4178 = 0, i4179 = 0;
+        int i4180 = 0, i4181 = 0, i4182 = 0, i4183 = 0, i4184 = 0, i4185 = 0, i4186 = 0, i4187 = 0, i4188 = 0, i4189 = 0;
+        int i4190 = 0, i4191 = 0, i4192 = 0, i4193 = 0, i4194 = 0, i4195 = 0, i4196 = 0, i4197 = 0, i4198 = 0, i4199 = 0;
+        int i4200 = 0, i4201 = 0, i4202 = 0, i4203 = 0, i4204 = 0, i4205 = 0, i4206 = 0, i4207 = 0, i4208 = 0, i4209 = 0;
+        int i4210 = 0, i4211 = 0, i4212 = 0, i4213 = 0, i4214 = 0, i4215 = 0, i4216 = 0, i4217 = 0, i4218 = 0, i4219 = 0;
+        int i4220 = 0, i4221 = 0, i4222 = 0, i4223 = 0, i4224 = 0, i4225 = 0, i4226 = 0, i4227 = 0, i4228 = 0, i4229 = 0;
+        int i4230 = 0, i4231 = 0, i4232 = 0, i4233 = 0, i4234 = 0, i4235 = 0, i4236 = 0, i4237 = 0, i4238 = 0, i4239 = 0;
+        int i4240 = 0, i4241 = 0, i4242 = 0, i4243 = 0, i4244 = 0, i4245 = 0, i4246 = 0, i4247 = 0, i4248 = 0, i4249 = 0;
+        int i4250 = 0, i4251 = 0, i4252 = 0, i4253 = 0, i4254 = 0, i4255 = 0, i4256 = 0, i4257 = 0, i4258 = 0, i4259 = 0;
+        int i4260 = 0, i4261 = 0, i4262 = 0, i4263 = 0, i4264 = 0, i4265 = 0, i4266 = 0, i4267 = 0, i4268 = 0, i4269 = 0;
+        int i4270 = 0, i4271 = 0, i4272 = 0, i4273 = 0, i4274 = 0, i4275 = 0, i4276 = 0, i4277 = 0, i4278 = 0, i4279 = 0;
+        int i4280 = 0, i4281 = 0, i4282 = 0, i4283 = 0, i4284 = 0, i4285 = 0, i4286 = 0, i4287 = 0, i4288 = 0, i4289 = 0;
+        int i4290 = 0, i4291 = 0, i4292 = 0, i4293 = 0, i4294 = 0, i4295 = 0, i4296 = 0, i4297 = 0, i4298 = 0, i4299 = 0;
+        int i4300 = 0, i4301 = 0, i4302 = 0, i4303 = 0, i4304 = 0, i4305 = 0, i4306 = 0, i4307 = 0, i4308 = 0, i4309 = 0;
+        int i4310 = 0, i4311 = 0, i4312 = 0, i4313 = 0, i4314 = 0, i4315 = 0, i4316 = 0, i4317 = 0, i4318 = 0, i4319 = 0;
+        int i4320 = 0, i4321 = 0, i4322 = 0, i4323 = 0, i4324 = 0, i4325 = 0, i4326 = 0, i4327 = 0, i4328 = 0, i4329 = 0;
+        int i4330 = 0, i4331 = 0, i4332 = 0, i4333 = 0, i4334 = 0, i4335 = 0, i4336 = 0, i4337 = 0, i4338 = 0, i4339 = 0;
+        int i4340 = 0, i4341 = 0, i4342 = 0, i4343 = 0, i4344 = 0, i4345 = 0, i4346 = 0, i4347 = 0, i4348 = 0, i4349 = 0;
+        int i4350 = 0, i4351 = 0, i4352 = 0, i4353 = 0, i4354 = 0, i4355 = 0, i4356 = 0, i4357 = 0, i4358 = 0, i4359 = 0;
+        int i4360 = 0, i4361 = 0, i4362 = 0, i4363 = 0, i4364 = 0, i4365 = 0, i4366 = 0, i4367 = 0, i4368 = 0, i4369 = 0;
+        int i4370 = 0, i4371 = 0, i4372 = 0, i4373 = 0, i4374 = 0, i4375 = 0, i4376 = 0, i4377 = 0, i4378 = 0, i4379 = 0;
+        int i4380 = 0, i4381 = 0, i4382 = 0, i4383 = 0, i4384 = 0, i4385 = 0, i4386 = 0, i4387 = 0, i4388 = 0, i4389 = 0;
+        int i4390 = 0, i4391 = 0, i4392 = 0, i4393 = 0, i4394 = 0, i4395 = 0, i4396 = 0, i4397 = 0, i4398 = 0, i4399 = 0;
+        int i4400 = 0, i4401 = 0, i4402 = 0, i4403 = 0, i4404 = 0, i4405 = 0, i4406 = 0, i4407 = 0, i4408 = 0, i4409 = 0;
+        int i4410 = 0, i4411 = 0, i4412 = 0, i4413 = 0, i4414 = 0, i4415 = 0, i4416 = 0, i4417 = 0, i4418 = 0, i4419 = 0;
+        int i4420 = 0, i4421 = 0, i4422 = 0, i4423 = 0, i4424 = 0, i4425 = 0, i4426 = 0, i4427 = 0, i4428 = 0, i4429 = 0;
+        int i4430 = 0, i4431 = 0, i4432 = 0, i4433 = 0, i4434 = 0, i4435 = 0, i4436 = 0, i4437 = 0, i4438 = 0, i4439 = 0;
+        int i4440 = 0, i4441 = 0, i4442 = 0, i4443 = 0, i4444 = 0, i4445 = 0, i4446 = 0, i4447 = 0, i4448 = 0, i4449 = 0;
+        int i4450 = 0, i4451 = 0, i4452 = 0, i4453 = 0, i4454 = 0, i4455 = 0, i4456 = 0, i4457 = 0, i4458 = 0, i4459 = 0;
+        int i4460 = 0, i4461 = 0, i4462 = 0, i4463 = 0, i4464 = 0, i4465 = 0, i4466 = 0, i4467 = 0, i4468 = 0, i4469 = 0;
+        int i4470 = 0, i4471 = 0, i4472 = 0, i4473 = 0, i4474 = 0, i4475 = 0, i4476 = 0, i4477 = 0, i4478 = 0, i4479 = 0;
+        int i4480 = 0, i4481 = 0, i4482 = 0, i4483 = 0, i4484 = 0, i4485 = 0, i4486 = 0, i4487 = 0, i4488 = 0, i4489 = 0;
+        int i4490 = 0, i4491 = 0, i4492 = 0, i4493 = 0, i4494 = 0, i4495 = 0, i4496 = 0, i4497 = 0, i4498 = 0, i4499 = 0;
+        int i4500 = 0, i4501 = 0, i4502 = 0, i4503 = 0, i4504 = 0, i4505 = 0, i4506 = 0, i4507 = 0, i4508 = 0, i4509 = 0;
+        int i4510 = 0, i4511 = 0, i4512 = 0, i4513 = 0, i4514 = 0, i4515 = 0, i4516 = 0, i4517 = 0, i4518 = 0, i4519 = 0;
+        int i4520 = 0, i4521 = 0, i4522 = 0, i4523 = 0, i4524 = 0, i4525 = 0, i4526 = 0, i4527 = 0, i4528 = 0, i4529 = 0;
+        int i4530 = 0, i4531 = 0, i4532 = 0, i4533 = 0, i4534 = 0, i4535 = 0, i4536 = 0, i4537 = 0, i4538 = 0, i4539 = 0;
+        int i4540 = 0, i4541 = 0, i4542 = 0, i4543 = 0, i4544 = 0, i4545 = 0, i4546 = 0, i4547 = 0, i4548 = 0, i4549 = 0;
+        int i4550 = 0, i4551 = 0, i4552 = 0, i4553 = 0, i4554 = 0, i4555 = 0, i4556 = 0, i4557 = 0, i4558 = 0, i4559 = 0;
+        int i4560 = 0, i4561 = 0, i4562 = 0, i4563 = 0, i4564 = 0, i4565 = 0, i4566 = 0, i4567 = 0, i4568 = 0, i4569 = 0;
+        int i4570 = 0, i4571 = 0, i4572 = 0, i4573 = 0, i4574 = 0, i4575 = 0, i4576 = 0, i4577 = 0, i4578 = 0, i4579 = 0;
+        int i4580 = 0, i4581 = 0, i4582 = 0, i4583 = 0, i4584 = 0, i4585 = 0, i4586 = 0, i4587 = 0, i4588 = 0, i4589 = 0;
+        int i4590 = 0, i4591 = 0, i4592 = 0, i4593 = 0, i4594 = 0, i4595 = 0, i4596 = 0, i4597 = 0, i4598 = 0, i4599 = 0;
+        int i4600 = 0, i4601 = 0, i4602 = 0, i4603 = 0, i4604 = 0, i4605 = 0, i4606 = 0, i4607 = 0, i4608 = 0, i4609 = 0;
+        int i4610 = 0, i4611 = 0, i4612 = 0, i4613 = 0, i4614 = 0, i4615 = 0, i4616 = 0, i4617 = 0, i4618 = 0, i4619 = 0;
+        int i4620 = 0, i4621 = 0, i4622 = 0, i4623 = 0, i4624 = 0, i4625 = 0, i4626 = 0, i4627 = 0, i4628 = 0, i4629 = 0;
+        int i4630 = 0, i4631 = 0, i4632 = 0, i4633 = 0, i4634 = 0, i4635 = 0, i4636 = 0, i4637 = 0, i4638 = 0, i4639 = 0;
+        int i4640 = 0, i4641 = 0, i4642 = 0, i4643 = 0, i4644 = 0, i4645 = 0, i4646 = 0, i4647 = 0, i4648 = 0, i4649 = 0;
+        int i4650 = 0, i4651 = 0, i4652 = 0, i4653 = 0, i4654 = 0, i4655 = 0, i4656 = 0, i4657 = 0, i4658 = 0, i4659 = 0;
+        int i4660 = 0, i4661 = 0, i4662 = 0, i4663 = 0, i4664 = 0, i4665 = 0, i4666 = 0, i4667 = 0, i4668 = 0, i4669 = 0;
+        int i4670 = 0, i4671 = 0, i4672 = 0, i4673 = 0, i4674 = 0, i4675 = 0, i4676 = 0, i4677 = 0, i4678 = 0, i4679 = 0;
+        int i4680 = 0, i4681 = 0, i4682 = 0, i4683 = 0, i4684 = 0, i4685 = 0, i4686 = 0, i4687 = 0, i4688 = 0, i4689 = 0;
+        int i4690 = 0, i4691 = 0, i4692 = 0, i4693 = 0, i4694 = 0, i4695 = 0, i4696 = 0, i4697 = 0, i4698 = 0, i4699 = 0;
+        int i4700 = 0, i4701 = 0, i4702 = 0, i4703 = 0, i4704 = 0, i4705 = 0, i4706 = 0, i4707 = 0, i4708 = 0, i4709 = 0;
+        int i4710 = 0, i4711 = 0, i4712 = 0, i4713 = 0, i4714 = 0, i4715 = 0, i4716 = 0, i4717 = 0, i4718 = 0, i4719 = 0;
+        int i4720 = 0, i4721 = 0, i4722 = 0, i4723 = 0, i4724 = 0, i4725 = 0, i4726 = 0, i4727 = 0, i4728 = 0, i4729 = 0;
+        int i4730 = 0, i4731 = 0, i4732 = 0, i4733 = 0, i4734 = 0, i4735 = 0, i4736 = 0, i4737 = 0, i4738 = 0, i4739 = 0;
+        int i4740 = 0, i4741 = 0, i4742 = 0, i4743 = 0, i4744 = 0, i4745 = 0, i4746 = 0, i4747 = 0, i4748 = 0, i4749 = 0;
+        int i4750 = 0, i4751 = 0, i4752 = 0, i4753 = 0, i4754 = 0, i4755 = 0, i4756 = 0, i4757 = 0, i4758 = 0, i4759 = 0;
+        int i4760 = 0, i4761 = 0, i4762 = 0, i4763 = 0, i4764 = 0, i4765 = 0, i4766 = 0, i4767 = 0, i4768 = 0, i4769 = 0;
+        int i4770 = 0, i4771 = 0, i4772 = 0, i4773 = 0, i4774 = 0, i4775 = 0, i4776 = 0, i4777 = 0, i4778 = 0, i4779 = 0;
+        int i4780 = 0, i4781 = 0, i4782 = 0, i4783 = 0, i4784 = 0, i4785 = 0, i4786 = 0, i4787 = 0, i4788 = 0, i4789 = 0;
+        int i4790 = 0, i4791 = 0, i4792 = 0, i4793 = 0, i4794 = 0, i4795 = 0, i4796 = 0, i4797 = 0, i4798 = 0, i4799 = 0;
+        int i4800 = 0, i4801 = 0, i4802 = 0, i4803 = 0, i4804 = 0, i4805 = 0, i4806 = 0, i4807 = 0, i4808 = 0, i4809 = 0;
+        int i4810 = 0, i4811 = 0, i4812 = 0, i4813 = 0, i4814 = 0, i4815 = 0, i4816 = 0, i4817 = 0, i4818 = 0, i4819 = 0;
+        int i4820 = 0, i4821 = 0, i4822 = 0, i4823 = 0, i4824 = 0, i4825 = 0, i4826 = 0, i4827 = 0, i4828 = 0, i4829 = 0;
+        int i4830 = 0, i4831 = 0, i4832 = 0, i4833 = 0, i4834 = 0, i4835 = 0, i4836 = 0, i4837 = 0, i4838 = 0, i4839 = 0;
+        int i4840 = 0, i4841 = 0, i4842 = 0, i4843 = 0, i4844 = 0, i4845 = 0, i4846 = 0, i4847 = 0, i4848 = 0, i4849 = 0;
+        int i4850 = 0, i4851 = 0, i4852 = 0, i4853 = 0, i4854 = 0, i4855 = 0, i4856 = 0, i4857 = 0, i4858 = 0, i4859 = 0;
+        int i4860 = 0, i4861 = 0, i4862 = 0, i4863 = 0, i4864 = 0, i4865 = 0, i4866 = 0, i4867 = 0, i4868 = 0, i4869 = 0;
+        int i4870 = 0, i4871 = 0, i4872 = 0, i4873 = 0, i4874 = 0, i4875 = 0, i4876 = 0, i4877 = 0, i4878 = 0, i4879 = 0;
+        int i4880 = 0, i4881 = 0, i4882 = 0, i4883 = 0, i4884 = 0, i4885 = 0, i4886 = 0, i4887 = 0, i4888 = 0, i4889 = 0;
+        int i4890 = 0, i4891 = 0, i4892 = 0, i4893 = 0, i4894 = 0, i4895 = 0, i4896 = 0, i4897 = 0, i4898 = 0, i4899 = 0;
+        int i4900 = 0, i4901 = 0, i4902 = 0, i4903 = 0, i4904 = 0, i4905 = 0, i4906 = 0, i4907 = 0, i4908 = 0, i4909 = 0;
+        int i4910 = 0, i4911 = 0, i4912 = 0, i4913 = 0, i4914 = 0, i4915 = 0, i4916 = 0, i4917 = 0, i4918 = 0, i4919 = 0;
+        int i4920 = 0, i4921 = 0, i4922 = 0, i4923 = 0, i4924 = 0, i4925 = 0, i4926 = 0, i4927 = 0, i4928 = 0, i4929 = 0;
+        int i4930 = 0, i4931 = 0, i4932 = 0, i4933 = 0, i4934 = 0, i4935 = 0, i4936 = 0, i4937 = 0, i4938 = 0, i4939 = 0;
+        int i4940 = 0, i4941 = 0, i4942 = 0, i4943 = 0, i4944 = 0, i4945 = 0, i4946 = 0, i4947 = 0, i4948 = 0, i4949 = 0;
+        int i4950 = 0, i4951 = 0, i4952 = 0, i4953 = 0, i4954 = 0, i4955 = 0, i4956 = 0, i4957 = 0, i4958 = 0, i4959 = 0;
+        int i4960 = 0, i4961 = 0, i4962 = 0, i4963 = 0, i4964 = 0, i4965 = 0, i4966 = 0, i4967 = 0, i4968 = 0, i4969 = 0;
+        int i4970 = 0, i4971 = 0, i4972 = 0, i4973 = 0, i4974 = 0, i4975 = 0, i4976 = 0, i4977 = 0, i4978 = 0, i4979 = 0;
+        int i4980 = 0, i4981 = 0, i4982 = 0, i4983 = 0, i4984 = 0, i4985 = 0, i4986 = 0, i4987 = 0, i4988 = 0, i4989 = 0;
+        int i4990 = 0, i4991 = 0, i4992 = 0, i4993 = 0, i4994 = 0, i4995 = 0, i4996 = 0, i4997 = 0, i4998 = 0, i4999 = 0;
+        int i5000 = 0, i5001 = 0, i5002 = 0, i5003 = 0, i5004 = 0, i5005 = 0, i5006 = 0, i5007 = 0, i5008 = 0, i5009 = 0;
+        int i5010 = 0, i5011 = 0, i5012 = 0, i5013 = 0, i5014 = 0, i5015 = 0, i5016 = 0, i5017 = 0, i5018 = 0, i5019 = 0;
+        int i5020 = 0, i5021 = 0, i5022 = 0, i5023 = 0, i5024 = 0, i5025 = 0, i5026 = 0, i5027 = 0, i5028 = 0, i5029 = 0;
+        int i5030 = 0, i5031 = 0, i5032 = 0, i5033 = 0, i5034 = 0, i5035 = 0, i5036 = 0, i5037 = 0, i5038 = 0, i5039 = 0;
+        int i5040 = 0, i5041 = 0, i5042 = 0, i5043 = 0, i5044 = 0, i5045 = 0, i5046 = 0, i5047 = 0, i5048 = 0, i5049 = 0;
+        int i5050 = 0, i5051 = 0, i5052 = 0, i5053 = 0, i5054 = 0, i5055 = 0, i5056 = 0, i5057 = 0, i5058 = 0, i5059 = 0;
+        int i5060 = 0, i5061 = 0, i5062 = 0, i5063 = 0, i5064 = 0, i5065 = 0, i5066 = 0, i5067 = 0, i5068 = 0, i5069 = 0;
+        int i5070 = 0, i5071 = 0, i5072 = 0, i5073 = 0, i5074 = 0, i5075 = 0, i5076 = 0, i5077 = 0, i5078 = 0, i5079 = 0;
+        int i5080 = 0, i5081 = 0, i5082 = 0, i5083 = 0, i5084 = 0, i5085 = 0, i5086 = 0, i5087 = 0, i5088 = 0, i5089 = 0;
+        int i5090 = 0, i5091 = 0, i5092 = 0, i5093 = 0, i5094 = 0, i5095 = 0, i5096 = 0, i5097 = 0, i5098 = 0, i5099 = 0;
+        int i5100 = 0, i5101 = 0, i5102 = 0, i5103 = 0, i5104 = 0, i5105 = 0, i5106 = 0, i5107 = 0, i5108 = 0, i5109 = 0;
+        int i5110 = 0, i5111 = 0, i5112 = 0, i5113 = 0, i5114 = 0, i5115 = 0, i5116 = 0, i5117 = 0, i5118 = 0, i5119 = 0;
+        int i5120 = 0, i5121 = 0, i5122 = 0, i5123 = 0, i5124 = 0, i5125 = 0, i5126 = 0, i5127 = 0, i5128 = 0, i5129 = 0;
+        int i5130 = 0, i5131 = 0, i5132 = 0, i5133 = 0, i5134 = 0, i5135 = 0, i5136 = 0, i5137 = 0, i5138 = 0, i5139 = 0;
+        int i5140 = 0, i5141 = 0, i5142 = 0, i5143 = 0, i5144 = 0, i5145 = 0, i5146 = 0, i5147 = 0, i5148 = 0, i5149 = 0;
+        int i5150 = 0, i5151 = 0, i5152 = 0, i5153 = 0, i5154 = 0, i5155 = 0, i5156 = 0, i5157 = 0, i5158 = 0, i5159 = 0;
+        int i5160 = 0, i5161 = 0, i5162 = 0, i5163 = 0, i5164 = 0, i5165 = 0, i5166 = 0, i5167 = 0, i5168 = 0, i5169 = 0;
+        int i5170 = 0, i5171 = 0, i5172 = 0, i5173 = 0, i5174 = 0, i5175 = 0, i5176 = 0, i5177 = 0, i5178 = 0, i5179 = 0;
+        int i5180 = 0, i5181 = 0, i5182 = 0, i5183 = 0, i5184 = 0, i5185 = 0, i5186 = 0, i5187 = 0, i5188 = 0, i5189 = 0;
+        int i5190 = 0, i5191 = 0, i5192 = 0, i5193 = 0, i5194 = 0, i5195 = 0, i5196 = 0, i5197 = 0, i5198 = 0, i5199 = 0;
+        int i5200 = 0, i5201 = 0, i5202 = 0, i5203 = 0, i5204 = 0, i5205 = 0, i5206 = 0, i5207 = 0, i5208 = 0, i5209 = 0;
+        int i5210 = 0, i5211 = 0, i5212 = 0, i5213 = 0, i5214 = 0, i5215 = 0, i5216 = 0, i5217 = 0, i5218 = 0, i5219 = 0;
+        int i5220 = 0, i5221 = 0, i5222 = 0, i5223 = 0, i5224 = 0, i5225 = 0, i5226 = 0, i5227 = 0, i5228 = 0, i5229 = 0;
+        int i5230 = 0, i5231 = 0, i5232 = 0, i5233 = 0, i5234 = 0, i5235 = 0, i5236 = 0, i5237 = 0, i5238 = 0, i5239 = 0;
+        int i5240 = 0, i5241 = 0, i5242 = 0, i5243 = 0, i5244 = 0, i5245 = 0, i5246 = 0, i5247 = 0, i5248 = 0, i5249 = 0;
+        int i5250 = 0, i5251 = 0, i5252 = 0, i5253 = 0, i5254 = 0, i5255 = 0, i5256 = 0, i5257 = 0, i5258 = 0, i5259 = 0;
+        int i5260 = 0, i5261 = 0, i5262 = 0, i5263 = 0, i5264 = 0, i5265 = 0, i5266 = 0, i5267 = 0, i5268 = 0, i5269 = 0;
+        int i5270 = 0, i5271 = 0, i5272 = 0, i5273 = 0, i5274 = 0, i5275 = 0, i5276 = 0, i5277 = 0, i5278 = 0, i5279 = 0;
+        int i5280 = 0, i5281 = 0, i5282 = 0, i5283 = 0, i5284 = 0, i5285 = 0, i5286 = 0, i5287 = 0, i5288 = 0, i5289 = 0;
+        int i5290 = 0, i5291 = 0, i5292 = 0, i5293 = 0, i5294 = 0, i5295 = 0, i5296 = 0, i5297 = 0, i5298 = 0, i5299 = 0;
+        int i5300 = 0, i5301 = 0, i5302 = 0, i5303 = 0, i5304 = 0, i5305 = 0, i5306 = 0, i5307 = 0, i5308 = 0, i5309 = 0;
+        int i5310 = 0, i5311 = 0, i5312 = 0, i5313 = 0, i5314 = 0, i5315 = 0, i5316 = 0, i5317 = 0, i5318 = 0, i5319 = 0;
+        int i5320 = 0, i5321 = 0, i5322 = 0, i5323 = 0, i5324 = 0, i5325 = 0, i5326 = 0, i5327 = 0, i5328 = 0, i5329 = 0;
+        int i5330 = 0, i5331 = 0, i5332 = 0, i5333 = 0, i5334 = 0, i5335 = 0, i5336 = 0, i5337 = 0, i5338 = 0, i5339 = 0;
+        int i5340 = 0, i5341 = 0, i5342 = 0, i5343 = 0, i5344 = 0, i5345 = 0, i5346 = 0, i5347 = 0, i5348 = 0, i5349 = 0;
+        int i5350 = 0, i5351 = 0, i5352 = 0, i5353 = 0, i5354 = 0, i5355 = 0, i5356 = 0, i5357 = 0, i5358 = 0, i5359 = 0;
+        int i5360 = 0, i5361 = 0, i5362 = 0, i5363 = 0, i5364 = 0, i5365 = 0, i5366 = 0, i5367 = 0, i5368 = 0, i5369 = 0;
+        int i5370 = 0, i5371 = 0, i5372 = 0, i5373 = 0, i5374 = 0, i5375 = 0, i5376 = 0, i5377 = 0, i5378 = 0, i5379 = 0;
+        int i5380 = 0, i5381 = 0, i5382 = 0, i5383 = 0, i5384 = 0, i5385 = 0, i5386 = 0, i5387 = 0, i5388 = 0, i5389 = 0;
+        int i5390 = 0, i5391 = 0, i5392 = 0, i5393 = 0, i5394 = 0, i5395 = 0, i5396 = 0, i5397 = 0, i5398 = 0, i5399 = 0;
+        int i5400 = 0, i5401 = 0, i5402 = 0, i5403 = 0, i5404 = 0, i5405 = 0, i5406 = 0, i5407 = 0, i5408 = 0, i5409 = 0;
+        int i5410 = 0, i5411 = 0, i5412 = 0, i5413 = 0, i5414 = 0, i5415 = 0, i5416 = 0, i5417 = 0, i5418 = 0, i5419 = 0;
+        int i5420 = 0, i5421 = 0, i5422 = 0, i5423 = 0, i5424 = 0, i5425 = 0, i5426 = 0, i5427 = 0, i5428 = 0, i5429 = 0;
+        int i5430 = 0, i5431 = 0, i5432 = 0, i5433 = 0, i5434 = 0, i5435 = 0, i5436 = 0, i5437 = 0, i5438 = 0, i5439 = 0;
+        int i5440 = 0, i5441 = 0, i5442 = 0, i5443 = 0, i5444 = 0, i5445 = 0, i5446 = 0, i5447 = 0, i5448 = 0, i5449 = 0;
+        int i5450 = 0, i5451 = 0, i5452 = 0, i5453 = 0, i5454 = 0, i5455 = 0, i5456 = 0, i5457 = 0, i5458 = 0, i5459 = 0;
+        int i5460 = 0, i5461 = 0, i5462 = 0, i5463 = 0, i5464 = 0, i5465 = 0, i5466 = 0, i5467 = 0, i5468 = 0, i5469 = 0;
+        int i5470 = 0, i5471 = 0, i5472 = 0, i5473 = 0, i5474 = 0, i5475 = 0, i5476 = 0, i5477 = 0, i5478 = 0, i5479 = 0;
+        int i5480 = 0, i5481 = 0, i5482 = 0, i5483 = 0, i5484 = 0, i5485 = 0, i5486 = 0, i5487 = 0, i5488 = 0, i5489 = 0;
+        int i5490 = 0, i5491 = 0, i5492 = 0, i5493 = 0, i5494 = 0, i5495 = 0, i5496 = 0, i5497 = 0, i5498 = 0, i5499 = 0;
+        int i5500 = 0, i5501 = 0, i5502 = 0, i5503 = 0, i5504 = 0, i5505 = 0, i5506 = 0, i5507 = 0, i5508 = 0, i5509 = 0;
+        int i5510 = 0, i5511 = 0, i5512 = 0, i5513 = 0, i5514 = 0, i5515 = 0, i5516 = 0, i5517 = 0, i5518 = 0, i5519 = 0;
+        int i5520 = 0, i5521 = 0, i5522 = 0, i5523 = 0, i5524 = 0, i5525 = 0, i5526 = 0, i5527 = 0, i5528 = 0, i5529 = 0;
+        int i5530 = 0, i5531 = 0, i5532 = 0, i5533 = 0, i5534 = 0, i5535 = 0, i5536 = 0, i5537 = 0, i5538 = 0, i5539 = 0;
+        int i5540 = 0, i5541 = 0, i5542 = 0, i5543 = 0, i5544 = 0, i5545 = 0, i5546 = 0, i5547 = 0, i5548 = 0, i5549 = 0;
+        int i5550 = 0, i5551 = 0, i5552 = 0, i5553 = 0, i5554 = 0, i5555 = 0, i5556 = 0, i5557 = 0, i5558 = 0, i5559 = 0;
+        int i5560 = 0, i5561 = 0, i5562 = 0, i5563 = 0, i5564 = 0, i5565 = 0, i5566 = 0, i5567 = 0, i5568 = 0, i5569 = 0;
+        int i5570 = 0, i5571 = 0, i5572 = 0, i5573 = 0, i5574 = 0, i5575 = 0, i5576 = 0, i5577 = 0, i5578 = 0, i5579 = 0;
+        int i5580 = 0, i5581 = 0, i5582 = 0, i5583 = 0, i5584 = 0, i5585 = 0, i5586 = 0, i5587 = 0, i5588 = 0, i5589 = 0;
+        int i5590 = 0, i5591 = 0, i5592 = 0, i5593 = 0, i5594 = 0, i5595 = 0, i5596 = 0, i5597 = 0, i5598 = 0, i5599 = 0;
+        int i5600 = 0, i5601 = 0, i5602 = 0, i5603 = 0, i5604 = 0, i5605 = 0, i5606 = 0, i5607 = 0, i5608 = 0, i5609 = 0;
+        int i5610 = 0, i5611 = 0, i5612 = 0, i5613 = 0, i5614 = 0, i5615 = 0, i5616 = 0, i5617 = 0, i5618 = 0, i5619 = 0;
+        int i5620 = 0, i5621 = 0, i5622 = 0, i5623 = 0, i5624 = 0, i5625 = 0, i5626 = 0, i5627 = 0, i5628 = 0, i5629 = 0;
+        int i5630 = 0, i5631 = 0, i5632 = 0, i5633 = 0, i5634 = 0, i5635 = 0, i5636 = 0, i5637 = 0, i5638 = 0, i5639 = 0;
+        int i5640 = 0, i5641 = 0, i5642 = 0, i5643 = 0, i5644 = 0, i5645 = 0, i5646 = 0, i5647 = 0, i5648 = 0, i5649 = 0;
+        int i5650 = 0, i5651 = 0, i5652 = 0, i5653 = 0, i5654 = 0, i5655 = 0, i5656 = 0, i5657 = 0, i5658 = 0, i5659 = 0;
+        int i5660 = 0, i5661 = 0, i5662 = 0, i5663 = 0, i5664 = 0, i5665 = 0, i5666 = 0, i5667 = 0, i5668 = 0, i5669 = 0;
+        int i5670 = 0, i5671 = 0, i5672 = 0, i5673 = 0, i5674 = 0, i5675 = 0, i5676 = 0, i5677 = 0, i5678 = 0, i5679 = 0;
+        int i5680 = 0, i5681 = 0, i5682 = 0, i5683 = 0, i5684 = 0, i5685 = 0, i5686 = 0, i5687 = 0, i5688 = 0, i5689 = 0;
+        int i5690 = 0, i5691 = 0, i5692 = 0, i5693 = 0, i5694 = 0, i5695 = 0, i5696 = 0, i5697 = 0, i5698 = 0, i5699 = 0;
+        int i5700 = 0, i5701 = 0, i5702 = 0, i5703 = 0, i5704 = 0, i5705 = 0, i5706 = 0, i5707 = 0, i5708 = 0, i5709 = 0;
+        int i5710 = 0, i5711 = 0, i5712 = 0, i5713 = 0, i5714 = 0, i5715 = 0, i5716 = 0, i5717 = 0, i5718 = 0, i5719 = 0;
+        int i5720 = 0, i5721 = 0, i5722 = 0, i5723 = 0, i5724 = 0, i5725 = 0, i5726 = 0, i5727 = 0, i5728 = 0, i5729 = 0;
+        int i5730 = 0, i5731 = 0, i5732 = 0, i5733 = 0, i5734 = 0, i5735 = 0, i5736 = 0, i5737 = 0, i5738 = 0, i5739 = 0;
+        int i5740 = 0, i5741 = 0, i5742 = 0, i5743 = 0, i5744 = 0, i5745 = 0, i5746 = 0, i5747 = 0, i5748 = 0, i5749 = 0;
+        int i5750 = 0, i5751 = 0, i5752 = 0, i5753 = 0, i5754 = 0, i5755 = 0, i5756 = 0, i5757 = 0, i5758 = 0, i5759 = 0;
+        int i5760 = 0, i5761 = 0, i5762 = 0, i5763 = 0, i5764 = 0, i5765 = 0, i5766 = 0, i5767 = 0, i5768 = 0, i5769 = 0;
+        int i5770 = 0, i5771 = 0, i5772 = 0, i5773 = 0, i5774 = 0, i5775 = 0, i5776 = 0, i5777 = 0, i5778 = 0, i5779 = 0;
+        int i5780 = 0, i5781 = 0, i5782 = 0, i5783 = 0, i5784 = 0, i5785 = 0, i5786 = 0, i5787 = 0, i5788 = 0, i5789 = 0;
+        int i5790 = 0, i5791 = 0, i5792 = 0, i5793 = 0, i5794 = 0, i5795 = 0, i5796 = 0, i5797 = 0, i5798 = 0, i5799 = 0;
+        int i5800 = 0, i5801 = 0, i5802 = 0, i5803 = 0, i5804 = 0, i5805 = 0, i5806 = 0, i5807 = 0, i5808 = 0, i5809 = 0;
+        int i5810 = 0, i5811 = 0, i5812 = 0, i5813 = 0, i5814 = 0, i5815 = 0, i5816 = 0, i5817 = 0, i5818 = 0, i5819 = 0;
+        int i5820 = 0, i5821 = 0, i5822 = 0, i5823 = 0, i5824 = 0, i5825 = 0, i5826 = 0, i5827 = 0, i5828 = 0, i5829 = 0;
+        int i5830 = 0, i5831 = 0, i5832 = 0, i5833 = 0, i5834 = 0, i5835 = 0, i5836 = 0, i5837 = 0, i5838 = 0, i5839 = 0;
+        int i5840 = 0, i5841 = 0, i5842 = 0, i5843 = 0, i5844 = 0, i5845 = 0, i5846 = 0, i5847 = 0, i5848 = 0, i5849 = 0;
+        int i5850 = 0, i5851 = 0, i5852 = 0, i5853 = 0, i5854 = 0, i5855 = 0, i5856 = 0, i5857 = 0, i5858 = 0, i5859 = 0;
+        int i5860 = 0, i5861 = 0, i5862 = 0, i5863 = 0, i5864 = 0, i5865 = 0, i5866 = 0, i5867 = 0, i5868 = 0, i5869 = 0;
+        int i5870 = 0, i5871 = 0, i5872 = 0, i5873 = 0, i5874 = 0, i5875 = 0, i5876 = 0, i5877 = 0, i5878 = 0, i5879 = 0;
+        int i5880 = 0, i5881 = 0, i5882 = 0, i5883 = 0, i5884 = 0, i5885 = 0, i5886 = 0, i5887 = 0, i5888 = 0, i5889 = 0;
+        int i5890 = 0, i5891 = 0, i5892 = 0, i5893 = 0, i5894 = 0, i5895 = 0, i5896 = 0, i5897 = 0, i5898 = 0, i5899 = 0;
+        int i5900 = 0, i5901 = 0, i5902 = 0, i5903 = 0, i5904 = 0, i5905 = 0, i5906 = 0, i5907 = 0, i5908 = 0, i5909 = 0;
+        int i5910 = 0, i5911 = 0, i5912 = 0, i5913 = 0, i5914 = 0, i5915 = 0, i5916 = 0, i5917 = 0, i5918 = 0, i5919 = 0;
+        int i5920 = 0, i5921 = 0, i5922 = 0, i5923 = 0, i5924 = 0, i5925 = 0, i5926 = 0, i5927 = 0, i5928 = 0, i5929 = 0;
+        int i5930 = 0, i5931 = 0, i5932 = 0, i5933 = 0, i5934 = 0, i5935 = 0, i5936 = 0, i5937 = 0, i5938 = 0, i5939 = 0;
+        int i5940 = 0, i5941 = 0, i5942 = 0, i5943 = 0, i5944 = 0, i5945 = 0, i5946 = 0, i5947 = 0, i5948 = 0, i5949 = 0;
+        int i5950 = 0, i5951 = 0, i5952 = 0, i5953 = 0, i5954 = 0, i5955 = 0, i5956 = 0, i5957 = 0, i5958 = 0, i5959 = 0;
+        int i5960 = 0, i5961 = 0, i5962 = 0, i5963 = 0, i5964 = 0, i5965 = 0, i5966 = 0, i5967 = 0, i5968 = 0, i5969 = 0;
+        int i5970 = 0, i5971 = 0, i5972 = 0, i5973 = 0, i5974 = 0, i5975 = 0, i5976 = 0, i5977 = 0, i5978 = 0, i5979 = 0;
+        int i5980 = 0, i5981 = 0, i5982 = 0, i5983 = 0, i5984 = 0, i5985 = 0, i5986 = 0, i5987 = 0, i5988 = 0, i5989 = 0;
+        int i5990 = 0, i5991 = 0, i5992 = 0, i5993 = 0, i5994 = 0, i5995 = 0, i5996 = 0, i5997 = 0, i5998 = 0, i5999 = 0;
+        int i6000 = 0, i6001 = 0, i6002 = 0, i6003 = 0, i6004 = 0, i6005 = 0, i6006 = 0, i6007 = 0, i6008 = 0, i6009 = 0;
+        int i6010 = 0, i6011 = 0, i6012 = 0, i6013 = 0, i6014 = 0, i6015 = 0, i6016 = 0, i6017 = 0, i6018 = 0, i6019 = 0;
+        int i6020 = 0, i6021 = 0, i6022 = 0, i6023 = 0, i6024 = 0, i6025 = 0, i6026 = 0, i6027 = 0, i6028 = 0, i6029 = 0;
+        int i6030 = 0, i6031 = 0, i6032 = 0, i6033 = 0, i6034 = 0, i6035 = 0, i6036 = 0, i6037 = 0, i6038 = 0, i6039 = 0;
+        int i6040 = 0, i6041 = 0, i6042 = 0, i6043 = 0, i6044 = 0, i6045 = 0, i6046 = 0, i6047 = 0, i6048 = 0, i6049 = 0;
+        int i6050 = 0, i6051 = 0, i6052 = 0, i6053 = 0, i6054 = 0, i6055 = 0, i6056 = 0, i6057 = 0, i6058 = 0, i6059 = 0;
+        int i6060 = 0, i6061 = 0, i6062 = 0, i6063 = 0, i6064 = 0, i6065 = 0, i6066 = 0, i6067 = 0, i6068 = 0, i6069 = 0;
+        int i6070 = 0, i6071 = 0, i6072 = 0, i6073 = 0, i6074 = 0, i6075 = 0, i6076 = 0, i6077 = 0, i6078 = 0, i6079 = 0;
+        int i6080 = 0, i6081 = 0, i6082 = 0, i6083 = 0, i6084 = 0, i6085 = 0, i6086 = 0, i6087 = 0, i6088 = 0, i6089 = 0;
+        int i6090 = 0, i6091 = 0, i6092 = 0, i6093 = 0, i6094 = 0, i6095 = 0, i6096 = 0, i6097 = 0, i6098 = 0, i6099 = 0;
+        int i6100 = 0, i6101 = 0, i6102 = 0, i6103 = 0, i6104 = 0, i6105 = 0, i6106 = 0, i6107 = 0, i6108 = 0, i6109 = 0;
+        int i6110 = 0, i6111 = 0, i6112 = 0, i6113 = 0, i6114 = 0, i6115 = 0, i6116 = 0, i6117 = 0, i6118 = 0, i6119 = 0;
+        int i6120 = 0, i6121 = 0, i6122 = 0, i6123 = 0, i6124 = 0, i6125 = 0, i6126 = 0, i6127 = 0, i6128 = 0, i6129 = 0;
+        int i6130 = 0, i6131 = 0, i6132 = 0, i6133 = 0, i6134 = 0, i6135 = 0, i6136 = 0, i6137 = 0, i6138 = 0, i6139 = 0;
+        int i6140 = 0, i6141 = 0, i6142 = 0, i6143 = 0, i6144 = 0, i6145 = 0, i6146 = 0, i6147 = 0, i6148 = 0, i6149 = 0;
+        int i6150 = 0, i6151 = 0, i6152 = 0, i6153 = 0, i6154 = 0, i6155 = 0, i6156 = 0, i6157 = 0, i6158 = 0, i6159 = 0;
+        int i6160 = 0, i6161 = 0, i6162 = 0, i6163 = 0, i6164 = 0, i6165 = 0, i6166 = 0, i6167 = 0, i6168 = 0, i6169 = 0;
+        int i6170 = 0, i6171 = 0, i6172 = 0, i6173 = 0, i6174 = 0, i6175 = 0, i6176 = 0, i6177 = 0, i6178 = 0, i6179 = 0;
+        int i6180 = 0, i6181 = 0, i6182 = 0, i6183 = 0, i6184 = 0, i6185 = 0, i6186 = 0, i6187 = 0, i6188 = 0, i6189 = 0;
+        int i6190 = 0, i6191 = 0, i6192 = 0, i6193 = 0, i6194 = 0, i6195 = 0, i6196 = 0, i6197 = 0, i6198 = 0, i6199 = 0;
+        int i6200 = 0, i6201 = 0, i6202 = 0, i6203 = 0, i6204 = 0, i6205 = 0, i6206 = 0, i6207 = 0, i6208 = 0, i6209 = 0;
+        int i6210 = 0, i6211 = 0, i6212 = 0, i6213 = 0, i6214 = 0, i6215 = 0, i6216 = 0, i6217 = 0, i6218 = 0, i6219 = 0;
+        int i6220 = 0, i6221 = 0, i6222 = 0, i6223 = 0, i6224 = 0, i6225 = 0, i6226 = 0, i6227 = 0, i6228 = 0, i6229 = 0;
+        int i6230 = 0, i6231 = 0, i6232 = 0, i6233 = 0, i6234 = 0, i6235 = 0, i6236 = 0, i6237 = 0, i6238 = 0, i6239 = 0;
+        int i6240 = 0, i6241 = 0, i6242 = 0, i6243 = 0, i6244 = 0, i6245 = 0, i6246 = 0, i6247 = 0, i6248 = 0, i6249 = 0;
+        int i6250 = 0, i6251 = 0, i6252 = 0, i6253 = 0, i6254 = 0, i6255 = 0, i6256 = 0, i6257 = 0, i6258 = 0, i6259 = 0;
+        int i6260 = 0, i6261 = 0, i6262 = 0, i6263 = 0, i6264 = 0, i6265 = 0, i6266 = 0, i6267 = 0, i6268 = 0, i6269 = 0;
+        int i6270 = 0, i6271 = 0, i6272 = 0, i6273 = 0, i6274 = 0, i6275 = 0, i6276 = 0, i6277 = 0, i6278 = 0, i6279 = 0;
+        int i6280 = 0, i6281 = 0, i6282 = 0, i6283 = 0, i6284 = 0, i6285 = 0, i6286 = 0, i6287 = 0, i6288 = 0, i6289 = 0;
+        int i6290 = 0, i6291 = 0, i6292 = 0, i6293 = 0, i6294 = 0, i6295 = 0, i6296 = 0, i6297 = 0, i6298 = 0, i6299 = 0;
+        int i6300 = 0, i6301 = 0, i6302 = 0, i6303 = 0, i6304 = 0, i6305 = 0, i6306 = 0, i6307 = 0, i6308 = 0, i6309 = 0;
+        int i6310 = 0, i6311 = 0, i6312 = 0, i6313 = 0, i6314 = 0, i6315 = 0, i6316 = 0, i6317 = 0, i6318 = 0, i6319 = 0;
+        int i6320 = 0, i6321 = 0, i6322 = 0, i6323 = 0, i6324 = 0, i6325 = 0, i6326 = 0, i6327 = 0, i6328 = 0, i6329 = 0;
+        int i6330 = 0, i6331 = 0, i6332 = 0, i6333 = 0, i6334 = 0, i6335 = 0, i6336 = 0, i6337 = 0, i6338 = 0, i6339 = 0;
+        int i6340 = 0, i6341 = 0, i6342 = 0, i6343 = 0, i6344 = 0, i6345 = 0, i6346 = 0, i6347 = 0, i6348 = 0, i6349 = 0;
+        int i6350 = 0, i6351 = 0, i6352 = 0, i6353 = 0, i6354 = 0, i6355 = 0, i6356 = 0, i6357 = 0, i6358 = 0, i6359 = 0;
+        int i6360 = 0, i6361 = 0, i6362 = 0, i6363 = 0, i6364 = 0, i6365 = 0, i6366 = 0, i6367 = 0, i6368 = 0, i6369 = 0;
+        int i6370 = 0, i6371 = 0, i6372 = 0, i6373 = 0, i6374 = 0, i6375 = 0, i6376 = 0, i6377 = 0, i6378 = 0, i6379 = 0;
+        int i6380 = 0, i6381 = 0, i6382 = 0, i6383 = 0, i6384 = 0, i6385 = 0, i6386 = 0, i6387 = 0, i6388 = 0, i6389 = 0;
+        int i6390 = 0, i6391 = 0, i6392 = 0, i6393 = 0, i6394 = 0, i6395 = 0, i6396 = 0, i6397 = 0, i6398 = 0, i6399 = 0;
+        int i6400 = 0, i6401 = 0, i6402 = 0, i6403 = 0, i6404 = 0, i6405 = 0, i6406 = 0, i6407 = 0, i6408 = 0, i6409 = 0;
+        int i6410 = 0, i6411 = 0, i6412 = 0, i6413 = 0, i6414 = 0, i6415 = 0, i6416 = 0, i6417 = 0, i6418 = 0, i6419 = 0;
+        int i6420 = 0, i6421 = 0, i6422 = 0, i6423 = 0, i6424 = 0, i6425 = 0, i6426 = 0, i6427 = 0, i6428 = 0, i6429 = 0;
+        int i6430 = 0, i6431 = 0, i6432 = 0, i6433 = 0, i6434 = 0, i6435 = 0, i6436 = 0, i6437 = 0, i6438 = 0, i6439 = 0;
+        int i6440 = 0, i6441 = 0, i6442 = 0, i6443 = 0, i6444 = 0, i6445 = 0, i6446 = 0, i6447 = 0, i6448 = 0, i6449 = 0;
+        int i6450 = 0, i6451 = 0, i6452 = 0, i6453 = 0, i6454 = 0, i6455 = 0, i6456 = 0, i6457 = 0, i6458 = 0, i6459 = 0;
+        int i6460 = 0, i6461 = 0, i6462 = 0, i6463 = 0, i6464 = 0, i6465 = 0, i6466 = 0, i6467 = 0, i6468 = 0, i6469 = 0;
+        int i6470 = 0, i6471 = 0, i6472 = 0, i6473 = 0, i6474 = 0, i6475 = 0, i6476 = 0, i6477 = 0, i6478 = 0, i6479 = 0;
+        int i6480 = 0, i6481 = 0, i6482 = 0, i6483 = 0, i6484 = 0, i6485 = 0, i6486 = 0, i6487 = 0, i6488 = 0, i6489 = 0;
+        int i6490 = 0, i6491 = 0, i6492 = 0, i6493 = 0, i6494 = 0, i6495 = 0, i6496 = 0, i6497 = 0, i6498 = 0, i6499 = 0;
+        int i6500 = 0, i6501 = 0, i6502 = 0, i6503 = 0, i6504 = 0, i6505 = 0, i6506 = 0, i6507 = 0, i6508 = 0, i6509 = 0;
+        int i6510 = 0, i6511 = 0, i6512 = 0, i6513 = 0, i6514 = 0, i6515 = 0, i6516 = 0, i6517 = 0, i6518 = 0, i6519 = 0;
+        int i6520 = 0, i6521 = 0, i6522 = 0, i6523 = 0, i6524 = 0, i6525 = 0, i6526 = 0, i6527 = 0, i6528 = 0, i6529 = 0;
+        int i6530 = 0, i6531 = 0, i6532 = 0, i6533 = 0, i6534 = 0, i6535 = 0, i6536 = 0, i6537 = 0, i6538 = 0, i6539 = 0;
+        int i6540 = 0, i6541 = 0, i6542 = 0, i6543 = 0, i6544 = 0, i6545 = 0, i6546 = 0, i6547 = 0, i6548 = 0, i6549 = 0;
+        int i6550 = 0, i6551 = 0, i6552 = 0, i6553 = 0, i6554 = 0, i6555 = 0, i6556 = 0, i6557 = 0, i6558 = 0, i6559 = 0;
+        int i6560 = 0, i6561 = 0, i6562 = 0, i6563 = 0, i6564 = 0, i6565 = 0, i6566 = 0, i6567 = 0, i6568 = 0, i6569 = 0;
+        int i6570 = 0, i6571 = 0, i6572 = 0, i6573 = 0, i6574 = 0, i6575 = 0, i6576 = 0, i6577 = 0, i6578 = 0, i6579 = 0;
+        int i6580 = 0, i6581 = 0, i6582 = 0, i6583 = 0, i6584 = 0, i6585 = 0, i6586 = 0, i6587 = 0, i6588 = 0, i6589 = 0;
+        int i6590 = 0, i6591 = 0, i6592 = 0, i6593 = 0, i6594 = 0, i6595 = 0, i6596 = 0, i6597 = 0, i6598 = 0, i6599 = 0;
+        int i6600 = 0, i6601 = 0, i6602 = 0, i6603 = 0, i6604 = 0, i6605 = 0, i6606 = 0, i6607 = 0, i6608 = 0, i6609 = 0;
+        int i6610 = 0, i6611 = 0, i6612 = 0, i6613 = 0, i6614 = 0, i6615 = 0, i6616 = 0, i6617 = 0, i6618 = 0, i6619 = 0;
+        int i6620 = 0, i6621 = 0, i6622 = 0, i6623 = 0, i6624 = 0, i6625 = 0, i6626 = 0, i6627 = 0, i6628 = 0, i6629 = 0;
+        int i6630 = 0, i6631 = 0, i6632 = 0, i6633 = 0, i6634 = 0, i6635 = 0, i6636 = 0, i6637 = 0, i6638 = 0, i6639 = 0;
+        int i6640 = 0, i6641 = 0, i6642 = 0, i6643 = 0, i6644 = 0, i6645 = 0, i6646 = 0, i6647 = 0, i6648 = 0, i6649 = 0;
+        int i6650 = 0, i6651 = 0, i6652 = 0, i6653 = 0, i6654 = 0, i6655 = 0, i6656 = 0, i6657 = 0, i6658 = 0, i6659 = 0;
+        int i6660 = 0, i6661 = 0, i6662 = 0, i6663 = 0, i6664 = 0, i6665 = 0, i6666 = 0, i6667 = 0, i6668 = 0, i6669 = 0;
+        int i6670 = 0, i6671 = 0, i6672 = 0, i6673 = 0, i6674 = 0, i6675 = 0, i6676 = 0, i6677 = 0, i6678 = 0, i6679 = 0;
+        int i6680 = 0, i6681 = 0, i6682 = 0, i6683 = 0, i6684 = 0, i6685 = 0, i6686 = 0, i6687 = 0, i6688 = 0, i6689 = 0;
+        int i6690 = 0, i6691 = 0, i6692 = 0, i6693 = 0, i6694 = 0, i6695 = 0, i6696 = 0, i6697 = 0, i6698 = 0, i6699 = 0;
+        int i6700 = 0, i6701 = 0, i6702 = 0, i6703 = 0, i6704 = 0, i6705 = 0, i6706 = 0, i6707 = 0, i6708 = 0, i6709 = 0;
+        int i6710 = 0, i6711 = 0, i6712 = 0, i6713 = 0, i6714 = 0, i6715 = 0, i6716 = 0, i6717 = 0, i6718 = 0, i6719 = 0;
+        int i6720 = 0, i6721 = 0, i6722 = 0, i6723 = 0, i6724 = 0, i6725 = 0, i6726 = 0, i6727 = 0, i6728 = 0, i6729 = 0;
+        int i6730 = 0, i6731 = 0, i6732 = 0, i6733 = 0, i6734 = 0, i6735 = 0, i6736 = 0, i6737 = 0, i6738 = 0, i6739 = 0;
+        int i6740 = 0, i6741 = 0, i6742 = 0, i6743 = 0, i6744 = 0, i6745 = 0, i6746 = 0, i6747 = 0, i6748 = 0, i6749 = 0;
+        int i6750 = 0, i6751 = 0, i6752 = 0, i6753 = 0, i6754 = 0, i6755 = 0, i6756 = 0, i6757 = 0, i6758 = 0, i6759 = 0;
+        int i6760 = 0, i6761 = 0, i6762 = 0, i6763 = 0, i6764 = 0, i6765 = 0, i6766 = 0, i6767 = 0, i6768 = 0, i6769 = 0;
+        int i6770 = 0, i6771 = 0, i6772 = 0, i6773 = 0, i6774 = 0, i6775 = 0, i6776 = 0, i6777 = 0, i6778 = 0, i6779 = 0;
+        int i6780 = 0, i6781 = 0, i6782 = 0, i6783 = 0, i6784 = 0, i6785 = 0, i6786 = 0, i6787 = 0, i6788 = 0, i6789 = 0;
+        int i6790 = 0, i6791 = 0, i6792 = 0, i6793 = 0, i6794 = 0, i6795 = 0, i6796 = 0, i6797 = 0, i6798 = 0, i6799 = 0;
+        int i6800 = 0, i6801 = 0, i6802 = 0, i6803 = 0, i6804 = 0, i6805 = 0, i6806 = 0, i6807 = 0, i6808 = 0, i6809 = 0;
+        int i6810 = 0, i6811 = 0, i6812 = 0, i6813 = 0, i6814 = 0, i6815 = 0, i6816 = 0, i6817 = 0, i6818 = 0, i6819 = 0;
+        int i6820 = 0, i6821 = 0, i6822 = 0, i6823 = 0, i6824 = 0, i6825 = 0, i6826 = 0, i6827 = 0, i6828 = 0, i6829 = 0;
+        int i6830 = 0, i6831 = 0, i6832 = 0, i6833 = 0, i6834 = 0, i6835 = 0, i6836 = 0, i6837 = 0, i6838 = 0, i6839 = 0;
+        int i6840 = 0, i6841 = 0, i6842 = 0, i6843 = 0, i6844 = 0, i6845 = 0, i6846 = 0, i6847 = 0, i6848 = 0, i6849 = 0;
+        int i6850 = 0, i6851 = 0, i6852 = 0, i6853 = 0, i6854 = 0, i6855 = 0, i6856 = 0, i6857 = 0, i6858 = 0, i6859 = 0;
+        int i6860 = 0, i6861 = 0, i6862 = 0, i6863 = 0, i6864 = 0, i6865 = 0, i6866 = 0, i6867 = 0, i6868 = 0, i6869 = 0;
+        int i6870 = 0, i6871 = 0, i6872 = 0, i6873 = 0, i6874 = 0, i6875 = 0, i6876 = 0, i6877 = 0, i6878 = 0, i6879 = 0;
+        int i6880 = 0, i6881 = 0, i6882 = 0, i6883 = 0, i6884 = 0, i6885 = 0, i6886 = 0, i6887 = 0, i6888 = 0, i6889 = 0;
+        int i6890 = 0, i6891 = 0, i6892 = 0, i6893 = 0, i6894 = 0, i6895 = 0, i6896 = 0, i6897 = 0, i6898 = 0, i6899 = 0;
+        int i6900 = 0, i6901 = 0, i6902 = 0, i6903 = 0, i6904 = 0, i6905 = 0, i6906 = 0, i6907 = 0, i6908 = 0, i6909 = 0;
+        int i6910 = 0, i6911 = 0, i6912 = 0, i6913 = 0, i6914 = 0, i6915 = 0, i6916 = 0, i6917 = 0, i6918 = 0, i6919 = 0;
+        int i6920 = 0, i6921 = 0, i6922 = 0, i6923 = 0, i6924 = 0, i6925 = 0, i6926 = 0, i6927 = 0, i6928 = 0, i6929 = 0;
+        int i6930 = 0, i6931 = 0, i6932 = 0, i6933 = 0, i6934 = 0, i6935 = 0, i6936 = 0, i6937 = 0, i6938 = 0, i6939 = 0;
+        int i6940 = 0, i6941 = 0, i6942 = 0, i6943 = 0, i6944 = 0, i6945 = 0, i6946 = 0, i6947 = 0, i6948 = 0, i6949 = 0;
+        int i6950 = 0, i6951 = 0, i6952 = 0, i6953 = 0, i6954 = 0, i6955 = 0, i6956 = 0, i6957 = 0, i6958 = 0, i6959 = 0;
+        int i6960 = 0, i6961 = 0, i6962 = 0, i6963 = 0, i6964 = 0, i6965 = 0, i6966 = 0, i6967 = 0, i6968 = 0, i6969 = 0;
+        int i6970 = 0, i6971 = 0, i6972 = 0, i6973 = 0, i6974 = 0, i6975 = 0, i6976 = 0, i6977 = 0, i6978 = 0, i6979 = 0;
+        int i6980 = 0, i6981 = 0, i6982 = 0, i6983 = 0, i6984 = 0, i6985 = 0, i6986 = 0, i6987 = 0, i6988 = 0, i6989 = 0;
+        int i6990 = 0, i6991 = 0, i6992 = 0, i6993 = 0, i6994 = 0, i6995 = 0, i6996 = 0, i6997 = 0, i6998 = 0, i6999 = 0;
+        int i7000 = 0, i7001 = 0, i7002 = 0, i7003 = 0, i7004 = 0, i7005 = 0, i7006 = 0, i7007 = 0, i7008 = 0, i7009 = 0;
+        int i7010 = 0, i7011 = 0, i7012 = 0, i7013 = 0, i7014 = 0, i7015 = 0, i7016 = 0, i7017 = 0, i7018 = 0, i7019 = 0;
+        int i7020 = 0, i7021 = 0, i7022 = 0, i7023 = 0, i7024 = 0, i7025 = 0, i7026 = 0, i7027 = 0, i7028 = 0, i7029 = 0;
+        int i7030 = 0, i7031 = 0, i7032 = 0, i7033 = 0, i7034 = 0, i7035 = 0, i7036 = 0, i7037 = 0, i7038 = 0, i7039 = 0;
+        int i7040 = 0, i7041 = 0, i7042 = 0, i7043 = 0, i7044 = 0, i7045 = 0, i7046 = 0, i7047 = 0, i7048 = 0, i7049 = 0;
+        int i7050 = 0, i7051 = 0, i7052 = 0, i7053 = 0, i7054 = 0, i7055 = 0, i7056 = 0, i7057 = 0, i7058 = 0, i7059 = 0;
+        int i7060 = 0, i7061 = 0, i7062 = 0, i7063 = 0, i7064 = 0, i7065 = 0, i7066 = 0, i7067 = 0, i7068 = 0, i7069 = 0;
+        int i7070 = 0, i7071 = 0, i7072 = 0, i7073 = 0, i7074 = 0, i7075 = 0, i7076 = 0, i7077 = 0, i7078 = 0, i7079 = 0;
+        int i7080 = 0, i7081 = 0, i7082 = 0, i7083 = 0, i7084 = 0, i7085 = 0, i7086 = 0, i7087 = 0, i7088 = 0, i7089 = 0;
+        int i7090 = 0, i7091 = 0, i7092 = 0, i7093 = 0, i7094 = 0, i7095 = 0, i7096 = 0, i7097 = 0, i7098 = 0, i7099 = 0;
+        int i7100 = 0, i7101 = 0, i7102 = 0, i7103 = 0, i7104 = 0, i7105 = 0, i7106 = 0, i7107 = 0, i7108 = 0, i7109 = 0;
+        int i7110 = 0, i7111 = 0, i7112 = 0, i7113 = 0, i7114 = 0, i7115 = 0, i7116 = 0, i7117 = 0, i7118 = 0, i7119 = 0;
+        int i7120 = 0, i7121 = 0, i7122 = 0, i7123 = 0, i7124 = 0, i7125 = 0, i7126 = 0, i7127 = 0, i7128 = 0, i7129 = 0;
+        int i7130 = 0, i7131 = 0, i7132 = 0, i7133 = 0, i7134 = 0, i7135 = 0, i7136 = 0, i7137 = 0, i7138 = 0, i7139 = 0;
+        int i7140 = 0, i7141 = 0, i7142 = 0, i7143 = 0, i7144 = 0, i7145 = 0, i7146 = 0, i7147 = 0, i7148 = 0, i7149 = 0;
+        int i7150 = 0, i7151 = 0, i7152 = 0, i7153 = 0, i7154 = 0, i7155 = 0, i7156 = 0, i7157 = 0, i7158 = 0, i7159 = 0;
+        int i7160 = 0, i7161 = 0, i7162 = 0, i7163 = 0, i7164 = 0, i7165 = 0, i7166 = 0, i7167 = 0, i7168 = 0, i7169 = 0;
+        int i7170 = 0, i7171 = 0, i7172 = 0, i7173 = 0, i7174 = 0, i7175 = 0, i7176 = 0, i7177 = 0, i7178 = 0, i7179 = 0;
+        int i7180 = 0, i7181 = 0, i7182 = 0, i7183 = 0, i7184 = 0, i7185 = 0, i7186 = 0, i7187 = 0, i7188 = 0, i7189 = 0;
+        int i7190 = 0, i7191 = 0, i7192 = 0, i7193 = 0, i7194 = 0, i7195 = 0, i7196 = 0, i7197 = 0, i7198 = 0, i7199 = 0;
+        int i7200 = 0, i7201 = 0, i7202 = 0, i7203 = 0, i7204 = 0, i7205 = 0, i7206 = 0, i7207 = 0, i7208 = 0, i7209 = 0;
+        int i7210 = 0, i7211 = 0, i7212 = 0, i7213 = 0, i7214 = 0, i7215 = 0, i7216 = 0, i7217 = 0, i7218 = 0, i7219 = 0;
+        int i7220 = 0, i7221 = 0, i7222 = 0, i7223 = 0, i7224 = 0, i7225 = 0, i7226 = 0, i7227 = 0, i7228 = 0, i7229 = 0;
+        int i7230 = 0, i7231 = 0, i7232 = 0, i7233 = 0, i7234 = 0, i7235 = 0, i7236 = 0, i7237 = 0, i7238 = 0, i7239 = 0;
+        int i7240 = 0, i7241 = 0, i7242 = 0, i7243 = 0, i7244 = 0, i7245 = 0, i7246 = 0, i7247 = 0, i7248 = 0, i7249 = 0;
+        int i7250 = 0, i7251 = 0, i7252 = 0, i7253 = 0, i7254 = 0, i7255 = 0, i7256 = 0, i7257 = 0, i7258 = 0, i7259 = 0;
+        int i7260 = 0, i7261 = 0, i7262 = 0, i7263 = 0, i7264 = 0, i7265 = 0, i7266 = 0, i7267 = 0, i7268 = 0, i7269 = 0;
+        int i7270 = 0, i7271 = 0, i7272 = 0, i7273 = 0, i7274 = 0, i7275 = 0, i7276 = 0, i7277 = 0, i7278 = 0, i7279 = 0;
+        int i7280 = 0, i7281 = 0, i7282 = 0, i7283 = 0, i7284 = 0, i7285 = 0, i7286 = 0, i7287 = 0, i7288 = 0, i7289 = 0;
+        int i7290 = 0, i7291 = 0, i7292 = 0, i7293 = 0, i7294 = 0, i7295 = 0, i7296 = 0, i7297 = 0, i7298 = 0, i7299 = 0;
+        int i7300 = 0, i7301 = 0, i7302 = 0, i7303 = 0, i7304 = 0, i7305 = 0, i7306 = 0, i7307 = 0, i7308 = 0, i7309 = 0;
+        int i7310 = 0, i7311 = 0, i7312 = 0, i7313 = 0, i7314 = 0, i7315 = 0, i7316 = 0, i7317 = 0, i7318 = 0, i7319 = 0;
+        int i7320 = 0, i7321 = 0, i7322 = 0, i7323 = 0, i7324 = 0, i7325 = 0, i7326 = 0, i7327 = 0, i7328 = 0, i7329 = 0;
+        int i7330 = 0, i7331 = 0, i7332 = 0, i7333 = 0, i7334 = 0, i7335 = 0, i7336 = 0, i7337 = 0, i7338 = 0, i7339 = 0;
+        int i7340 = 0, i7341 = 0, i7342 = 0, i7343 = 0, i7344 = 0, i7345 = 0, i7346 = 0, i7347 = 0, i7348 = 0, i7349 = 0;
+        int i7350 = 0, i7351 = 0, i7352 = 0, i7353 = 0, i7354 = 0, i7355 = 0, i7356 = 0, i7357 = 0, i7358 = 0, i7359 = 0;
+        int i7360 = 0, i7361 = 0, i7362 = 0, i7363 = 0, i7364 = 0, i7365 = 0, i7366 = 0, i7367 = 0, i7368 = 0, i7369 = 0;
+        int i7370 = 0, i7371 = 0, i7372 = 0, i7373 = 0, i7374 = 0, i7375 = 0, i7376 = 0, i7377 = 0, i7378 = 0, i7379 = 0;
+        int i7380 = 0, i7381 = 0, i7382 = 0, i7383 = 0, i7384 = 0, i7385 = 0, i7386 = 0, i7387 = 0, i7388 = 0, i7389 = 0;
+        int i7390 = 0, i7391 = 0, i7392 = 0, i7393 = 0, i7394 = 0, i7395 = 0, i7396 = 0, i7397 = 0, i7398 = 0, i7399 = 0;
+        int i7400 = 0, i7401 = 0, i7402 = 0, i7403 = 0, i7404 = 0, i7405 = 0, i7406 = 0, i7407 = 0, i7408 = 0, i7409 = 0;
+        int i7410 = 0, i7411 = 0, i7412 = 0, i7413 = 0, i7414 = 0, i7415 = 0, i7416 = 0, i7417 = 0, i7418 = 0, i7419 = 0;
+        int i7420 = 0, i7421 = 0, i7422 = 0, i7423 = 0, i7424 = 0, i7425 = 0, i7426 = 0, i7427 = 0, i7428 = 0, i7429 = 0;
+        int i7430 = 0, i7431 = 0, i7432 = 0, i7433 = 0, i7434 = 0, i7435 = 0, i7436 = 0, i7437 = 0, i7438 = 0, i7439 = 0;
+        int i7440 = 0, i7441 = 0, i7442 = 0, i7443 = 0, i7444 = 0, i7445 = 0, i7446 = 0, i7447 = 0, i7448 = 0, i7449 = 0;
+        int i7450 = 0, i7451 = 0, i7452 = 0, i7453 = 0, i7454 = 0, i7455 = 0, i7456 = 0, i7457 = 0, i7458 = 0, i7459 = 0;
+        int i7460 = 0, i7461 = 0, i7462 = 0, i7463 = 0, i7464 = 0, i7465 = 0, i7466 = 0, i7467 = 0, i7468 = 0, i7469 = 0;
+        int i7470 = 0, i7471 = 0, i7472 = 0, i7473 = 0, i7474 = 0, i7475 = 0, i7476 = 0, i7477 = 0, i7478 = 0, i7479 = 0;
+        int i7480 = 0, i7481 = 0, i7482 = 0, i7483 = 0, i7484 = 0, i7485 = 0, i7486 = 0, i7487 = 0, i7488 = 0, i7489 = 0;
+        int i7490 = 0, i7491 = 0, i7492 = 0, i7493 = 0, i7494 = 0, i7495 = 0, i7496 = 0, i7497 = 0, i7498 = 0, i7499 = 0;
+        int i7500 = 0, i7501 = 0, i7502 = 0, i7503 = 0, i7504 = 0, i7505 = 0, i7506 = 0, i7507 = 0, i7508 = 0, i7509 = 0;
+        int i7510 = 0, i7511 = 0, i7512 = 0, i7513 = 0, i7514 = 0, i7515 = 0, i7516 = 0, i7517 = 0, i7518 = 0, i7519 = 0;
+        int i7520 = 0, i7521 = 0, i7522 = 0, i7523 = 0, i7524 = 0, i7525 = 0, i7526 = 0, i7527 = 0, i7528 = 0, i7529 = 0;
+        int i7530 = 0, i7531 = 0, i7532 = 0, i7533 = 0, i7534 = 0, i7535 = 0, i7536 = 0, i7537 = 0, i7538 = 0, i7539 = 0;
+        int i7540 = 0, i7541 = 0, i7542 = 0, i7543 = 0, i7544 = 0, i7545 = 0, i7546 = 0, i7547 = 0, i7548 = 0, i7549 = 0;
+        int i7550 = 0, i7551 = 0, i7552 = 0, i7553 = 0, i7554 = 0, i7555 = 0, i7556 = 0, i7557 = 0, i7558 = 0, i7559 = 0;
+        int i7560 = 0, i7561 = 0, i7562 = 0, i7563 = 0, i7564 = 0, i7565 = 0, i7566 = 0, i7567 = 0, i7568 = 0, i7569 = 0;
+        int i7570 = 0, i7571 = 0, i7572 = 0, i7573 = 0, i7574 = 0, i7575 = 0, i7576 = 0, i7577 = 0, i7578 = 0, i7579 = 0;
+        int i7580 = 0, i7581 = 0, i7582 = 0, i7583 = 0, i7584 = 0, i7585 = 0, i7586 = 0, i7587 = 0, i7588 = 0, i7589 = 0;
+        int i7590 = 0, i7591 = 0, i7592 = 0, i7593 = 0, i7594 = 0, i7595 = 0, i7596 = 0, i7597 = 0, i7598 = 0, i7599 = 0;
+        int i7600 = 0, i7601 = 0, i7602 = 0, i7603 = 0, i7604 = 0, i7605 = 0, i7606 = 0, i7607 = 0, i7608 = 0, i7609 = 0;
+        int i7610 = 0, i7611 = 0, i7612 = 0, i7613 = 0, i7614 = 0, i7615 = 0, i7616 = 0, i7617 = 0, i7618 = 0, i7619 = 0;
+        int i7620 = 0, i7621 = 0, i7622 = 0, i7623 = 0, i7624 = 0, i7625 = 0, i7626 = 0, i7627 = 0, i7628 = 0, i7629 = 0;
+        int i7630 = 0, i7631 = 0, i7632 = 0, i7633 = 0, i7634 = 0, i7635 = 0, i7636 = 0, i7637 = 0, i7638 = 0, i7639 = 0;
+        int i7640 = 0, i7641 = 0, i7642 = 0, i7643 = 0, i7644 = 0, i7645 = 0, i7646 = 0, i7647 = 0, i7648 = 0, i7649 = 0;
+        int i7650 = 0, i7651 = 0, i7652 = 0, i7653 = 0, i7654 = 0, i7655 = 0, i7656 = 0, i7657 = 0, i7658 = 0, i7659 = 0;
+        int i7660 = 0, i7661 = 0, i7662 = 0, i7663 = 0, i7664 = 0, i7665 = 0, i7666 = 0, i7667 = 0, i7668 = 0, i7669 = 0;
+        int i7670 = 0, i7671 = 0, i7672 = 0, i7673 = 0, i7674 = 0, i7675 = 0, i7676 = 0, i7677 = 0, i7678 = 0, i7679 = 0;
+        int i7680 = 0, i7681 = 0, i7682 = 0, i7683 = 0, i7684 = 0, i7685 = 0, i7686 = 0, i7687 = 0, i7688 = 0, i7689 = 0;
+        int i7690 = 0, i7691 = 0, i7692 = 0, i7693 = 0, i7694 = 0, i7695 = 0, i7696 = 0, i7697 = 0, i7698 = 0, i7699 = 0;
+        int i7700 = 0, i7701 = 0, i7702 = 0, i7703 = 0, i7704 = 0, i7705 = 0, i7706 = 0, i7707 = 0, i7708 = 0, i7709 = 0;
+        int i7710 = 0, i7711 = 0, i7712 = 0, i7713 = 0, i7714 = 0, i7715 = 0, i7716 = 0, i7717 = 0, i7718 = 0, i7719 = 0;
+        int i7720 = 0, i7721 = 0, i7722 = 0, i7723 = 0, i7724 = 0, i7725 = 0, i7726 = 0, i7727 = 0, i7728 = 0, i7729 = 0;
+        int i7730 = 0, i7731 = 0, i7732 = 0, i7733 = 0, i7734 = 0, i7735 = 0, i7736 = 0, i7737 = 0, i7738 = 0, i7739 = 0;
+        int i7740 = 0, i7741 = 0, i7742 = 0, i7743 = 0, i7744 = 0, i7745 = 0, i7746 = 0, i7747 = 0, i7748 = 0, i7749 = 0;
+        int i7750 = 0, i7751 = 0, i7752 = 0, i7753 = 0, i7754 = 0, i7755 = 0, i7756 = 0, i7757 = 0, i7758 = 0, i7759 = 0;
+        int i7760 = 0, i7761 = 0, i7762 = 0, i7763 = 0, i7764 = 0, i7765 = 0, i7766 = 0, i7767 = 0, i7768 = 0, i7769 = 0;
+        int i7770 = 0, i7771 = 0, i7772 = 0, i7773 = 0, i7774 = 0, i7775 = 0, i7776 = 0, i7777 = 0, i7778 = 0, i7779 = 0;
+        int i7780 = 0, i7781 = 0, i7782 = 0, i7783 = 0, i7784 = 0, i7785 = 0, i7786 = 0, i7787 = 0, i7788 = 0, i7789 = 0;
+        int i7790 = 0, i7791 = 0, i7792 = 0, i7793 = 0, i7794 = 0, i7795 = 0, i7796 = 0, i7797 = 0, i7798 = 0, i7799 = 0;
+        int i7800 = 0, i7801 = 0, i7802 = 0, i7803 = 0, i7804 = 0, i7805 = 0, i7806 = 0, i7807 = 0, i7808 = 0, i7809 = 0;
+        int i7810 = 0, i7811 = 0, i7812 = 0, i7813 = 0, i7814 = 0, i7815 = 0, i7816 = 0, i7817 = 0, i7818 = 0, i7819 = 0;
+        int i7820 = 0, i7821 = 0, i7822 = 0, i7823 = 0, i7824 = 0, i7825 = 0, i7826 = 0, i7827 = 0, i7828 = 0, i7829 = 0;
+        int i7830 = 0, i7831 = 0, i7832 = 0, i7833 = 0, i7834 = 0, i7835 = 0, i7836 = 0, i7837 = 0, i7838 = 0, i7839 = 0;
+        int i7840 = 0, i7841 = 0, i7842 = 0, i7843 = 0, i7844 = 0, i7845 = 0, i7846 = 0, i7847 = 0, i7848 = 0, i7849 = 0;
+        int i7850 = 0, i7851 = 0, i7852 = 0, i7853 = 0, i7854 = 0, i7855 = 0, i7856 = 0, i7857 = 0, i7858 = 0, i7859 = 0;
+        int i7860 = 0, i7861 = 0, i7862 = 0, i7863 = 0, i7864 = 0, i7865 = 0, i7866 = 0, i7867 = 0, i7868 = 0, i7869 = 0;
+        int i7870 = 0, i7871 = 0, i7872 = 0, i7873 = 0, i7874 = 0, i7875 = 0, i7876 = 0, i7877 = 0, i7878 = 0, i7879 = 0;
+        int i7880 = 0, i7881 = 0, i7882 = 0, i7883 = 0, i7884 = 0, i7885 = 0, i7886 = 0, i7887 = 0, i7888 = 0, i7889 = 0;
+        int i7890 = 0, i7891 = 0, i7892 = 0, i7893 = 0, i7894 = 0, i7895 = 0, i7896 = 0, i7897 = 0, i7898 = 0, i7899 = 0;
+        int i7900 = 0, i7901 = 0, i7902 = 0, i7903 = 0, i7904 = 0, i7905 = 0, i7906 = 0, i7907 = 0, i7908 = 0, i7909 = 0;
+        int i7910 = 0, i7911 = 0, i7912 = 0, i7913 = 0, i7914 = 0, i7915 = 0, i7916 = 0, i7917 = 0, i7918 = 0, i7919 = 0;
+        int i7920 = 0, i7921 = 0, i7922 = 0, i7923 = 0, i7924 = 0, i7925 = 0, i7926 = 0, i7927 = 0, i7928 = 0, i7929 = 0;
+        int i7930 = 0, i7931 = 0, i7932 = 0, i7933 = 0, i7934 = 0, i7935 = 0, i7936 = 0, i7937 = 0, i7938 = 0, i7939 = 0;
+        int i7940 = 0, i7941 = 0, i7942 = 0, i7943 = 0, i7944 = 0, i7945 = 0, i7946 = 0, i7947 = 0, i7948 = 0, i7949 = 0;
+        int i7950 = 0, i7951 = 0, i7952 = 0, i7953 = 0, i7954 = 0, i7955 = 0, i7956 = 0, i7957 = 0, i7958 = 0, i7959 = 0;
+        int i7960 = 0, i7961 = 0, i7962 = 0, i7963 = 0, i7964 = 0, i7965 = 0, i7966 = 0, i7967 = 0, i7968 = 0, i7969 = 0;
+        int i7970 = 0, i7971 = 0, i7972 = 0, i7973 = 0, i7974 = 0, i7975 = 0, i7976 = 0, i7977 = 0, i7978 = 0, i7979 = 0;
+        int i7980 = 0, i7981 = 0, i7982 = 0, i7983 = 0, i7984 = 0, i7985 = 0, i7986 = 0, i7987 = 0, i7988 = 0, i7989 = 0;
+        int i7990 = 0, i7991 = 0, i7992 = 0, i7993 = 0, i7994 = 0, i7995 = 0, i7996 = 0, i7997 = 0, i7998 = 0, i7999 = 0;
+        int i8000 = 0, i8001 = 0, i8002 = 0, i8003 = 0, i8004 = 0, i8005 = 0, i8006 = 0, i8007 = 0, i8008 = 0, i8009 = 0;
+        int i8010 = 0, i8011 = 0, i8012 = 0, i8013 = 0, i8014 = 0, i8015 = 0, i8016 = 0, i8017 = 0, i8018 = 0, i8019 = 0;
+        int i8020 = 0, i8021 = 0, i8022 = 0, i8023 = 0, i8024 = 0, i8025 = 0, i8026 = 0, i8027 = 0, i8028 = 0, i8029 = 0;
+        int i8030 = 0, i8031 = 0, i8032 = 0, i8033 = 0, i8034 = 0, i8035 = 0, i8036 = 0, i8037 = 0, i8038 = 0, i8039 = 0;
+        int i8040 = 0, i8041 = 0, i8042 = 0, i8043 = 0, i8044 = 0, i8045 = 0, i8046 = 0, i8047 = 0, i8048 = 0, i8049 = 0;
+        int i8050 = 0, i8051 = 0, i8052 = 0, i8053 = 0, i8054 = 0, i8055 = 0, i8056 = 0, i8057 = 0, i8058 = 0, i8059 = 0;
+        int i8060 = 0, i8061 = 0, i8062 = 0, i8063 = 0, i8064 = 0, i8065 = 0, i8066 = 0, i8067 = 0, i8068 = 0, i8069 = 0;
+        int i8070 = 0, i8071 = 0, i8072 = 0, i8073 = 0, i8074 = 0, i8075 = 0, i8076 = 0, i8077 = 0, i8078 = 0, i8079 = 0;
+        int i8080 = 0, i8081 = 0, i8082 = 0, i8083 = 0, i8084 = 0, i8085 = 0, i8086 = 0, i8087 = 0, i8088 = 0, i8089 = 0;
+        int i8090 = 0, i8091 = 0, i8092 = 0, i8093 = 0, i8094 = 0, i8095 = 0, i8096 = 0, i8097 = 0, i8098 = 0, i8099 = 0;
+        int i8100 = 0, i8101 = 0, i8102 = 0, i8103 = 0, i8104 = 0, i8105 = 0, i8106 = 0, i8107 = 0, i8108 = 0, i8109 = 0;
+        int i8110 = 0, i8111 = 0, i8112 = 0, i8113 = 0, i8114 = 0, i8115 = 0, i8116 = 0, i8117 = 0, i8118 = 0, i8119 = 0;
+        int i8120 = 0, i8121 = 0, i8122 = 0, i8123 = 0, i8124 = 0, i8125 = 0, i8126 = 0, i8127 = 0, i8128 = 0, i8129 = 0;
+        int i8130 = 0, i8131 = 0, i8132 = 0, i8133 = 0, i8134 = 0, i8135 = 0, i8136 = 0, i8137 = 0, i8138 = 0, i8139 = 0;
+        int i8140 = 0, i8141 = 0, i8142 = 0, i8143 = 0, i8144 = 0, i8145 = 0, i8146 = 0, i8147 = 0, i8148 = 0, i8149 = 0;
+        int i8150 = 0, i8151 = 0, i8152 = 0, i8153 = 0, i8154 = 0, i8155 = 0, i8156 = 0, i8157 = 0, i8158 = 0, i8159 = 0;
+        int i8160 = 0, i8161 = 0, i8162 = 0, i8163 = 0, i8164 = 0, i8165 = 0, i8166 = 0, i8167 = 0, i8168 = 0, i8169 = 0;
+        int i8170 = 0, i8171 = 0, i8172 = 0, i8173 = 0, i8174 = 0, i8175 = 0, i8176 = 0, i8177 = 0, i8178 = 0, i8179 = 0;
+        int i8180 = 0, i8181 = 0, i8182 = 0, i8183 = 0, i8184 = 0, i8185 = 0, i8186 = 0, i8187 = 0, i8188 = 0, i8189 = 0;
+        int i8190 = 0, i8191 = 0, i8192 = 0, i8193 = 0, i8194 = 0, i8195 = 0, i8196 = 0, i8197 = 0, i8198 = 0, i8199 = 0;
+        int i8200 = 0, i8201 = 0, i8202 = 0, i8203 = 0, i8204 = 0, i8205 = 0, i8206 = 0, i8207 = 0, i8208 = 0, i8209 = 0;
+        int i8210 = 0, i8211 = 0, i8212 = 0, i8213 = 0, i8214 = 0, i8215 = 0, i8216 = 0, i8217 = 0, i8218 = 0, i8219 = 0;
+        int i8220 = 0, i8221 = 0, i8222 = 0, i8223 = 0, i8224 = 0, i8225 = 0, i8226 = 0, i8227 = 0, i8228 = 0, i8229 = 0;
+        int i8230 = 0, i8231 = 0, i8232 = 0, i8233 = 0, i8234 = 0, i8235 = 0, i8236 = 0, i8237 = 0, i8238 = 0, i8239 = 0;
+        int i8240 = 0, i8241 = 0, i8242 = 0, i8243 = 0, i8244 = 0, i8245 = 0, i8246 = 0, i8247 = 0, i8248 = 0, i8249 = 0;
+        int i8250 = 0, i8251 = 0, i8252 = 0, i8253 = 0, i8254 = 0, i8255 = 0, i8256 = 0, i8257 = 0, i8258 = 0, i8259 = 0;
+        int i8260 = 0, i8261 = 0, i8262 = 0, i8263 = 0, i8264 = 0, i8265 = 0, i8266 = 0, i8267 = 0, i8268 = 0, i8269 = 0;
+        int i8270 = 0, i8271 = 0, i8272 = 0, i8273 = 0, i8274 = 0, i8275 = 0, i8276 = 0, i8277 = 0, i8278 = 0, i8279 = 0;
+        int i8280 = 0, i8281 = 0, i8282 = 0, i8283 = 0, i8284 = 0, i8285 = 0, i8286 = 0, i8287 = 0, i8288 = 0, i8289 = 0;
+        int i8290 = 0, i8291 = 0, i8292 = 0, i8293 = 0, i8294 = 0, i8295 = 0, i8296 = 0, i8297 = 0, i8298 = 0, i8299 = 0;
+        int i8300 = 0, i8301 = 0, i8302 = 0, i8303 = 0, i8304 = 0, i8305 = 0, i8306 = 0, i8307 = 0, i8308 = 0, i8309 = 0;
+        int i8310 = 0, i8311 = 0, i8312 = 0, i8313 = 0, i8314 = 0, i8315 = 0, i8316 = 0, i8317 = 0, i8318 = 0, i8319 = 0;
+        int i8320 = 0, i8321 = 0, i8322 = 0, i8323 = 0, i8324 = 0, i8325 = 0, i8326 = 0, i8327 = 0, i8328 = 0, i8329 = 0;
+        int i8330 = 0, i8331 = 0, i8332 = 0, i8333 = 0, i8334 = 0, i8335 = 0, i8336 = 0, i8337 = 0, i8338 = 0, i8339 = 0;
+        int i8340 = 0, i8341 = 0, i8342 = 0, i8343 = 0, i8344 = 0, i8345 = 0, i8346 = 0, i8347 = 0, i8348 = 0, i8349 = 0;
+        int i8350 = 0, i8351 = 0, i8352 = 0, i8353 = 0, i8354 = 0, i8355 = 0, i8356 = 0, i8357 = 0, i8358 = 0, i8359 = 0;
+        int i8360 = 0, i8361 = 0, i8362 = 0, i8363 = 0, i8364 = 0, i8365 = 0, i8366 = 0, i8367 = 0, i8368 = 0, i8369 = 0;
+        int i8370 = 0, i8371 = 0, i8372 = 0, i8373 = 0, i8374 = 0, i8375 = 0, i8376 = 0, i8377 = 0, i8378 = 0, i8379 = 0;
+        int i8380 = 0, i8381 = 0, i8382 = 0, i8383 = 0, i8384 = 0, i8385 = 0, i8386 = 0, i8387 = 0, i8388 = 0, i8389 = 0;
+        int i8390 = 0, i8391 = 0, i8392 = 0, i8393 = 0, i8394 = 0, i8395 = 0, i8396 = 0, i8397 = 0, i8398 = 0, i8399 = 0;
+        int i8400 = 0, i8401 = 0, i8402 = 0, i8403 = 0, i8404 = 0, i8405 = 0, i8406 = 0, i8407 = 0, i8408 = 0, i8409 = 0;
+        int i8410 = 0, i8411 = 0, i8412 = 0, i8413 = 0, i8414 = 0, i8415 = 0, i8416 = 0, i8417 = 0, i8418 = 0, i8419 = 0;
+        int i8420 = 0, i8421 = 0, i8422 = 0, i8423 = 0, i8424 = 0, i8425 = 0, i8426 = 0, i8427 = 0, i8428 = 0, i8429 = 0;
+        int i8430 = 0, i8431 = 0, i8432 = 0, i8433 = 0, i8434 = 0, i8435 = 0, i8436 = 0, i8437 = 0, i8438 = 0, i8439 = 0;
+        int i8440 = 0, i8441 = 0, i8442 = 0, i8443 = 0, i8444 = 0, i8445 = 0, i8446 = 0, i8447 = 0, i8448 = 0, i8449 = 0;
+        int i8450 = 0, i8451 = 0, i8452 = 0, i8453 = 0, i8454 = 0, i8455 = 0, i8456 = 0, i8457 = 0, i8458 = 0, i8459 = 0;
+        int i8460 = 0, i8461 = 0, i8462 = 0, i8463 = 0, i8464 = 0, i8465 = 0, i8466 = 0, i8467 = 0, i8468 = 0, i8469 = 0;
+        int i8470 = 0, i8471 = 0, i8472 = 0, i8473 = 0, i8474 = 0, i8475 = 0, i8476 = 0, i8477 = 0, i8478 = 0, i8479 = 0;
+        int i8480 = 0, i8481 = 0, i8482 = 0, i8483 = 0, i8484 = 0, i8485 = 0, i8486 = 0, i8487 = 0, i8488 = 0, i8489 = 0;
+        int i8490 = 0, i8491 = 0, i8492 = 0, i8493 = 0, i8494 = 0, i8495 = 0, i8496 = 0, i8497 = 0, i8498 = 0, i8499 = 0;
+        int i8500 = 0, i8501 = 0, i8502 = 0, i8503 = 0, i8504 = 0, i8505 = 0, i8506 = 0, i8507 = 0, i8508 = 0, i8509 = 0;
+        int i8510 = 0, i8511 = 0, i8512 = 0, i8513 = 0, i8514 = 0, i8515 = 0, i8516 = 0, i8517 = 0, i8518 = 0, i8519 = 0;
+        int i8520 = 0, i8521 = 0, i8522 = 0, i8523 = 0, i8524 = 0, i8525 = 0, i8526 = 0, i8527 = 0, i8528 = 0, i8529 = 0;
+        int i8530 = 0, i8531 = 0, i8532 = 0, i8533 = 0, i8534 = 0, i8535 = 0, i8536 = 0, i8537 = 0, i8538 = 0, i8539 = 0;
+        int i8540 = 0, i8541 = 0, i8542 = 0, i8543 = 0, i8544 = 0, i8545 = 0, i8546 = 0, i8547 = 0, i8548 = 0, i8549 = 0;
+        int i8550 = 0, i8551 = 0, i8552 = 0, i8553 = 0, i8554 = 0, i8555 = 0, i8556 = 0, i8557 = 0, i8558 = 0, i8559 = 0;
+        int i8560 = 0, i8561 = 0, i8562 = 0, i8563 = 0, i8564 = 0, i8565 = 0, i8566 = 0, i8567 = 0, i8568 = 0, i8569 = 0;
+        int i8570 = 0, i8571 = 0, i8572 = 0, i8573 = 0, i8574 = 0, i8575 = 0, i8576 = 0, i8577 = 0, i8578 = 0, i8579 = 0;
+        int i8580 = 0, i8581 = 0, i8582 = 0, i8583 = 0, i8584 = 0, i8585 = 0, i8586 = 0, i8587 = 0, i8588 = 0, i8589 = 0;
+        int i8590 = 0, i8591 = 0, i8592 = 0, i8593 = 0, i8594 = 0, i8595 = 0, i8596 = 0, i8597 = 0, i8598 = 0, i8599 = 0;
+        int i8600 = 0, i8601 = 0, i8602 = 0, i8603 = 0, i8604 = 0, i8605 = 0, i8606 = 0, i8607 = 0, i8608 = 0, i8609 = 0;
+        int i8610 = 0, i8611 = 0, i8612 = 0, i8613 = 0, i8614 = 0, i8615 = 0, i8616 = 0, i8617 = 0, i8618 = 0, i8619 = 0;
+        int i8620 = 0, i8621 = 0, i8622 = 0, i8623 = 0, i8624 = 0, i8625 = 0, i8626 = 0, i8627 = 0, i8628 = 0, i8629 = 0;
+        int i8630 = 0, i8631 = 0, i8632 = 0, i8633 = 0, i8634 = 0, i8635 = 0, i8636 = 0, i8637 = 0, i8638 = 0, i8639 = 0;
+        int i8640 = 0, i8641 = 0, i8642 = 0, i8643 = 0, i8644 = 0, i8645 = 0, i8646 = 0, i8647 = 0, i8648 = 0, i8649 = 0;
+        int i8650 = 0, i8651 = 0, i8652 = 0, i8653 = 0, i8654 = 0, i8655 = 0, i8656 = 0, i8657 = 0, i8658 = 0, i8659 = 0;
+        int i8660 = 0, i8661 = 0, i8662 = 0, i8663 = 0, i8664 = 0, i8665 = 0, i8666 = 0, i8667 = 0, i8668 = 0, i8669 = 0;
+        int i8670 = 0, i8671 = 0, i8672 = 0, i8673 = 0, i8674 = 0, i8675 = 0, i8676 = 0, i8677 = 0, i8678 = 0, i8679 = 0;
+        int i8680 = 0, i8681 = 0, i8682 = 0, i8683 = 0, i8684 = 0, i8685 = 0, i8686 = 0, i8687 = 0, i8688 = 0, i8689 = 0;
+        int i8690 = 0, i8691 = 0, i8692 = 0, i8693 = 0, i8694 = 0, i8695 = 0, i8696 = 0, i8697 = 0, i8698 = 0, i8699 = 0;
+        int i8700 = 0, i8701 = 0, i8702 = 0, i8703 = 0, i8704 = 0, i8705 = 0, i8706 = 0, i8707 = 0, i8708 = 0, i8709 = 0;
+        int i8710 = 0, i8711 = 0, i8712 = 0, i8713 = 0, i8714 = 0, i8715 = 0, i8716 = 0, i8717 = 0, i8718 = 0, i8719 = 0;
+        int i8720 = 0, i8721 = 0, i8722 = 0, i8723 = 0, i8724 = 0, i8725 = 0, i8726 = 0, i8727 = 0, i8728 = 0, i8729 = 0;
+        int i8730 = 0, i8731 = 0, i8732 = 0, i8733 = 0, i8734 = 0, i8735 = 0, i8736 = 0, i8737 = 0, i8738 = 0, i8739 = 0;
+        int i8740 = 0, i8741 = 0, i8742 = 0, i8743 = 0, i8744 = 0, i8745 = 0, i8746 = 0, i8747 = 0, i8748 = 0, i8749 = 0;
+        int i8750 = 0, i8751 = 0, i8752 = 0, i8753 = 0, i8754 = 0, i8755 = 0, i8756 = 0, i8757 = 0, i8758 = 0, i8759 = 0;
+        int i8760 = 0, i8761 = 0, i8762 = 0, i8763 = 0, i8764 = 0, i8765 = 0, i8766 = 0, i8767 = 0, i8768 = 0, i8769 = 0;
+        int i8770 = 0, i8771 = 0, i8772 = 0, i8773 = 0, i8774 = 0, i8775 = 0, i8776 = 0, i8777 = 0, i8778 = 0, i8779 = 0;
+        int i8780 = 0, i8781 = 0, i8782 = 0, i8783 = 0, i8784 = 0, i8785 = 0, i8786 = 0, i8787 = 0, i8788 = 0, i8789 = 0;
+        int i8790 = 0, i8791 = 0, i8792 = 0, i8793 = 0, i8794 = 0, i8795 = 0, i8796 = 0, i8797 = 0, i8798 = 0, i8799 = 0;
+        int i8800 = 0, i8801 = 0, i8802 = 0, i8803 = 0, i8804 = 0, i8805 = 0, i8806 = 0, i8807 = 0, i8808 = 0, i8809 = 0;
+        int i8810 = 0, i8811 = 0, i8812 = 0, i8813 = 0, i8814 = 0, i8815 = 0, i8816 = 0, i8817 = 0, i8818 = 0, i8819 = 0;
+        int i8820 = 0, i8821 = 0, i8822 = 0, i8823 = 0, i8824 = 0, i8825 = 0, i8826 = 0, i8827 = 0, i8828 = 0, i8829 = 0;
+        int i8830 = 0, i8831 = 0, i8832 = 0, i8833 = 0, i8834 = 0, i8835 = 0, i8836 = 0, i8837 = 0, i8838 = 0, i8839 = 0;
+        int i8840 = 0, i8841 = 0, i8842 = 0, i8843 = 0, i8844 = 0, i8845 = 0, i8846 = 0, i8847 = 0, i8848 = 0, i8849 = 0;
+        int i8850 = 0, i8851 = 0, i8852 = 0, i8853 = 0, i8854 = 0, i8855 = 0, i8856 = 0, i8857 = 0, i8858 = 0, i8859 = 0;
+        int i8860 = 0, i8861 = 0, i8862 = 0, i8863 = 0, i8864 = 0, i8865 = 0, i8866 = 0, i8867 = 0, i8868 = 0, i8869 = 0;
+        int i8870 = 0, i8871 = 0, i8872 = 0, i8873 = 0, i8874 = 0, i8875 = 0, i8876 = 0, i8877 = 0, i8878 = 0, i8879 = 0;
+        int i8880 = 0, i8881 = 0, i8882 = 0, i8883 = 0, i8884 = 0, i8885 = 0, i8886 = 0, i8887 = 0, i8888 = 0, i8889 = 0;
+        int i8890 = 0, i8891 = 0, i8892 = 0, i8893 = 0, i8894 = 0, i8895 = 0, i8896 = 0, i8897 = 0, i8898 = 0, i8899 = 0;
+        int i8900 = 0, i8901 = 0, i8902 = 0, i8903 = 0, i8904 = 0, i8905 = 0, i8906 = 0, i8907 = 0, i8908 = 0, i8909 = 0;
+        int i8910 = 0, i8911 = 0, i8912 = 0, i8913 = 0, i8914 = 0, i8915 = 0, i8916 = 0, i8917 = 0, i8918 = 0, i8919 = 0;
+        int i8920 = 0, i8921 = 0, i8922 = 0, i8923 = 0, i8924 = 0, i8925 = 0, i8926 = 0, i8927 = 0, i8928 = 0, i8929 = 0;
+        int i8930 = 0, i8931 = 0, i8932 = 0, i8933 = 0, i8934 = 0, i8935 = 0, i8936 = 0, i8937 = 0, i8938 = 0, i8939 = 0;
+        int i8940 = 0, i8941 = 0, i8942 = 0, i8943 = 0, i8944 = 0, i8945 = 0, i8946 = 0, i8947 = 0, i8948 = 0, i8949 = 0;
+        int i8950 = 0, i8951 = 0, i8952 = 0, i8953 = 0, i8954 = 0, i8955 = 0, i8956 = 0, i8957 = 0, i8958 = 0, i8959 = 0;
+        int i8960 = 0, i8961 = 0, i8962 = 0, i8963 = 0, i8964 = 0, i8965 = 0, i8966 = 0, i8967 = 0, i8968 = 0, i8969 = 0;
+        int i8970 = 0, i8971 = 0, i8972 = 0, i8973 = 0, i8974 = 0, i8975 = 0, i8976 = 0, i8977 = 0, i8978 = 0, i8979 = 0;
+        int i8980 = 0, i8981 = 0, i8982 = 0, i8983 = 0, i8984 = 0, i8985 = 0, i8986 = 0, i8987 = 0, i8988 = 0, i8989 = 0;
+        int i8990 = 0, i8991 = 0, i8992 = 0, i8993 = 0, i8994 = 0, i8995 = 0, i8996 = 0, i8997 = 0, i8998 = 0, i8999 = 0;
+        int i9000 = 0, i9001 = 0, i9002 = 0, i9003 = 0, i9004 = 0, i9005 = 0, i9006 = 0, i9007 = 0, i9008 = 0, i9009 = 0;
+        int i9010 = 0, i9011 = 0, i9012 = 0, i9013 = 0, i9014 = 0, i9015 = 0, i9016 = 0, i9017 = 0, i9018 = 0, i9019 = 0;
+        int i9020 = 0, i9021 = 0, i9022 = 0, i9023 = 0, i9024 = 0, i9025 = 0, i9026 = 0, i9027 = 0, i9028 = 0, i9029 = 0;
+        int i9030 = 0, i9031 = 0, i9032 = 0, i9033 = 0, i9034 = 0, i9035 = 0, i9036 = 0, i9037 = 0, i9038 = 0, i9039 = 0;
+        int i9040 = 0, i9041 = 0, i9042 = 0, i9043 = 0, i9044 = 0, i9045 = 0, i9046 = 0, i9047 = 0, i9048 = 0, i9049 = 0;
+        int i9050 = 0, i9051 = 0, i9052 = 0, i9053 = 0, i9054 = 0, i9055 = 0, i9056 = 0, i9057 = 0, i9058 = 0, i9059 = 0;
+        int i9060 = 0, i9061 = 0, i9062 = 0, i9063 = 0, i9064 = 0, i9065 = 0, i9066 = 0, i9067 = 0, i9068 = 0, i9069 = 0;
+        int i9070 = 0, i9071 = 0, i9072 = 0, i9073 = 0, i9074 = 0, i9075 = 0, i9076 = 0, i9077 = 0, i9078 = 0, i9079 = 0;
+        int i9080 = 0, i9081 = 0, i9082 = 0, i9083 = 0, i9084 = 0, i9085 = 0, i9086 = 0, i9087 = 0, i9088 = 0, i9089 = 0;
+        int i9090 = 0, i9091 = 0, i9092 = 0, i9093 = 0, i9094 = 0, i9095 = 0, i9096 = 0, i9097 = 0, i9098 = 0, i9099 = 0;
+        int i9100 = 0, i9101 = 0, i9102 = 0, i9103 = 0, i9104 = 0, i9105 = 0, i9106 = 0, i9107 = 0, i9108 = 0, i9109 = 0;
+        int i9110 = 0, i9111 = 0, i9112 = 0, i9113 = 0, i9114 = 0, i9115 = 0, i9116 = 0, i9117 = 0, i9118 = 0, i9119 = 0;
+        int i9120 = 0, i9121 = 0, i9122 = 0, i9123 = 0, i9124 = 0, i9125 = 0, i9126 = 0, i9127 = 0, i9128 = 0, i9129 = 0;
+        int i9130 = 0, i9131 = 0, i9132 = 0, i9133 = 0, i9134 = 0, i9135 = 0, i9136 = 0, i9137 = 0, i9138 = 0, i9139 = 0;
+        int i9140 = 0, i9141 = 0, i9142 = 0, i9143 = 0, i9144 = 0, i9145 = 0, i9146 = 0, i9147 = 0, i9148 = 0, i9149 = 0;
+        int i9150 = 0, i9151 = 0, i9152 = 0, i9153 = 0, i9154 = 0, i9155 = 0, i9156 = 0, i9157 = 0, i9158 = 0, i9159 = 0;
+        int i9160 = 0, i9161 = 0, i9162 = 0, i9163 = 0, i9164 = 0, i9165 = 0, i9166 = 0, i9167 = 0, i9168 = 0, i9169 = 0;
+        int i9170 = 0, i9171 = 0, i9172 = 0, i9173 = 0, i9174 = 0, i9175 = 0, i9176 = 0, i9177 = 0, i9178 = 0, i9179 = 0;
+        int i9180 = 0, i9181 = 0, i9182 = 0, i9183 = 0, i9184 = 0, i9185 = 0, i9186 = 0, i9187 = 0, i9188 = 0, i9189 = 0;
+        int i9190 = 0, i9191 = 0, i9192 = 0, i9193 = 0, i9194 = 0, i9195 = 0, i9196 = 0, i9197 = 0, i9198 = 0, i9199 = 0;
+        int i9200 = 0, i9201 = 0, i9202 = 0, i9203 = 0, i9204 = 0, i9205 = 0, i9206 = 0, i9207 = 0, i9208 = 0, i9209 = 0;
+        int i9210 = 0, i9211 = 0, i9212 = 0, i9213 = 0, i9214 = 0, i9215 = 0, i9216 = 0, i9217 = 0, i9218 = 0, i9219 = 0;
+        int i9220 = 0, i9221 = 0, i9222 = 0, i9223 = 0, i9224 = 0, i9225 = 0, i9226 = 0, i9227 = 0, i9228 = 0, i9229 = 0;
+        int i9230 = 0, i9231 = 0, i9232 = 0, i9233 = 0, i9234 = 0, i9235 = 0, i9236 = 0, i9237 = 0, i9238 = 0, i9239 = 0;
+        int i9240 = 0, i9241 = 0, i9242 = 0, i9243 = 0, i9244 = 0, i9245 = 0, i9246 = 0, i9247 = 0, i9248 = 0, i9249 = 0;
+        int i9250 = 0, i9251 = 0, i9252 = 0, i9253 = 0, i9254 = 0, i9255 = 0, i9256 = 0, i9257 = 0, i9258 = 0, i9259 = 0;
+        int i9260 = 0, i9261 = 0, i9262 = 0, i9263 = 0, i9264 = 0, i9265 = 0, i9266 = 0, i9267 = 0, i9268 = 0, i9269 = 0;
+        int i9270 = 0, i9271 = 0, i9272 = 0, i9273 = 0, i9274 = 0, i9275 = 0, i9276 = 0, i9277 = 0, i9278 = 0, i9279 = 0;
+        int i9280 = 0, i9281 = 0, i9282 = 0, i9283 = 0, i9284 = 0, i9285 = 0, i9286 = 0, i9287 = 0, i9288 = 0, i9289 = 0;
+        int i9290 = 0, i9291 = 0, i9292 = 0, i9293 = 0, i9294 = 0, i9295 = 0, i9296 = 0, i9297 = 0, i9298 = 0, i9299 = 0;
+        int i9300 = 0, i9301 = 0, i9302 = 0, i9303 = 0, i9304 = 0, i9305 = 0, i9306 = 0, i9307 = 0, i9308 = 0, i9309 = 0;
+        int i9310 = 0, i9311 = 0, i9312 = 0, i9313 = 0, i9314 = 0, i9315 = 0, i9316 = 0, i9317 = 0, i9318 = 0, i9319 = 0;
+        int i9320 = 0, i9321 = 0, i9322 = 0, i9323 = 0, i9324 = 0, i9325 = 0, i9326 = 0, i9327 = 0, i9328 = 0, i9329 = 0;
+        int i9330 = 0, i9331 = 0, i9332 = 0, i9333 = 0, i9334 = 0, i9335 = 0, i9336 = 0, i9337 = 0, i9338 = 0, i9339 = 0;
+        int i9340 = 0, i9341 = 0, i9342 = 0, i9343 = 0, i9344 = 0, i9345 = 0, i9346 = 0, i9347 = 0, i9348 = 0, i9349 = 0;
+        int i9350 = 0, i9351 = 0, i9352 = 0, i9353 = 0, i9354 = 0, i9355 = 0, i9356 = 0, i9357 = 0, i9358 = 0, i9359 = 0;
+        int i9360 = 0, i9361 = 0, i9362 = 0, i9363 = 0, i9364 = 0, i9365 = 0, i9366 = 0, i9367 = 0, i9368 = 0, i9369 = 0;
+        int i9370 = 0, i9371 = 0, i9372 = 0, i9373 = 0, i9374 = 0, i9375 = 0, i9376 = 0, i9377 = 0, i9378 = 0, i9379 = 0;
+        int i9380 = 0, i9381 = 0, i9382 = 0, i9383 = 0, i9384 = 0, i9385 = 0, i9386 = 0, i9387 = 0, i9388 = 0, i9389 = 0;
+        int i9390 = 0, i9391 = 0, i9392 = 0, i9393 = 0, i9394 = 0, i9395 = 0, i9396 = 0, i9397 = 0, i9398 = 0, i9399 = 0;
+        int i9400 = 0, i9401 = 0, i9402 = 0, i9403 = 0, i9404 = 0, i9405 = 0, i9406 = 0, i9407 = 0, i9408 = 0, i9409 = 0;
+        int i9410 = 0, i9411 = 0, i9412 = 0, i9413 = 0, i9414 = 0, i9415 = 0, i9416 = 0, i9417 = 0, i9418 = 0, i9419 = 0;
+        int i9420 = 0, i9421 = 0, i9422 = 0, i9423 = 0, i9424 = 0, i9425 = 0, i9426 = 0, i9427 = 0, i9428 = 0, i9429 = 0;
+        int i9430 = 0, i9431 = 0, i9432 = 0, i9433 = 0, i9434 = 0, i9435 = 0, i9436 = 0, i9437 = 0, i9438 = 0, i9439 = 0;
+        int i9440 = 0, i9441 = 0, i9442 = 0, i9443 = 0, i9444 = 0, i9445 = 0, i9446 = 0, i9447 = 0, i9448 = 0, i9449 = 0;
+        int i9450 = 0, i9451 = 0, i9452 = 0, i9453 = 0, i9454 = 0, i9455 = 0, i9456 = 0, i9457 = 0, i9458 = 0, i9459 = 0;
+        int i9460 = 0, i9461 = 0, i9462 = 0, i9463 = 0, i9464 = 0, i9465 = 0, i9466 = 0, i9467 = 0, i9468 = 0, i9469 = 0;
+        int i9470 = 0, i9471 = 0, i9472 = 0, i9473 = 0, i9474 = 0, i9475 = 0, i9476 = 0, i9477 = 0, i9478 = 0, i9479 = 0;
+        int i9480 = 0, i9481 = 0, i9482 = 0, i9483 = 0, i9484 = 0, i9485 = 0, i9486 = 0, i9487 = 0, i9488 = 0, i9489 = 0;
+        int i9490 = 0, i9491 = 0, i9492 = 0, i9493 = 0, i9494 = 0, i9495 = 0, i9496 = 0, i9497 = 0, i9498 = 0, i9499 = 0;
+        int i9500 = 0, i9501 = 0, i9502 = 0, i9503 = 0, i9504 = 0, i9505 = 0, i9506 = 0, i9507 = 0, i9508 = 0, i9509 = 0;
+        int i9510 = 0, i9511 = 0, i9512 = 0, i9513 = 0, i9514 = 0, i9515 = 0, i9516 = 0, i9517 = 0, i9518 = 0, i9519 = 0;
+        int i9520 = 0, i9521 = 0, i9522 = 0, i9523 = 0, i9524 = 0, i9525 = 0, i9526 = 0, i9527 = 0, i9528 = 0, i9529 = 0;
+        int i9530 = 0, i9531 = 0, i9532 = 0, i9533 = 0, i9534 = 0, i9535 = 0, i9536 = 0, i9537 = 0, i9538 = 0, i9539 = 0;
+        int i9540 = 0, i9541 = 0, i9542 = 0, i9543 = 0, i9544 = 0, i9545 = 0, i9546 = 0, i9547 = 0, i9548 = 0, i9549 = 0;
+        int i9550 = 0, i9551 = 0, i9552 = 0, i9553 = 0, i9554 = 0, i9555 = 0, i9556 = 0, i9557 = 0, i9558 = 0, i9559 = 0;
+        int i9560 = 0, i9561 = 0, i9562 = 0, i9563 = 0, i9564 = 0, i9565 = 0, i9566 = 0, i9567 = 0, i9568 = 0, i9569 = 0;
+        int i9570 = 0, i9571 = 0, i9572 = 0, i9573 = 0, i9574 = 0, i9575 = 0, i9576 = 0, i9577 = 0, i9578 = 0, i9579 = 0;
+        int i9580 = 0, i9581 = 0, i9582 = 0, i9583 = 0, i9584 = 0, i9585 = 0, i9586 = 0, i9587 = 0, i9588 = 0, i9589 = 0;
+        int i9590 = 0, i9591 = 0, i9592 = 0, i9593 = 0, i9594 = 0, i9595 = 0, i9596 = 0, i9597 = 0, i9598 = 0, i9599 = 0;
+        int i9600 = 0, i9601 = 0, i9602 = 0, i9603 = 0, i9604 = 0, i9605 = 0, i9606 = 0, i9607 = 0, i9608 = 0, i9609 = 0;
+        int i9610 = 0, i9611 = 0, i9612 = 0, i9613 = 0, i9614 = 0, i9615 = 0, i9616 = 0, i9617 = 0, i9618 = 0, i9619 = 0;
+        int i9620 = 0, i9621 = 0, i9622 = 0, i9623 = 0, i9624 = 0, i9625 = 0, i9626 = 0, i9627 = 0, i9628 = 0, i9629 = 0;
+        int i9630 = 0, i9631 = 0, i9632 = 0, i9633 = 0, i9634 = 0, i9635 = 0, i9636 = 0, i9637 = 0, i9638 = 0, i9639 = 0;
+        int i9640 = 0, i9641 = 0, i9642 = 0, i9643 = 0, i9644 = 0, i9645 = 0, i9646 = 0, i9647 = 0, i9648 = 0, i9649 = 0;
+        int i9650 = 0, i9651 = 0, i9652 = 0, i9653 = 0, i9654 = 0, i9655 = 0, i9656 = 0, i9657 = 0, i9658 = 0, i9659 = 0;
+        int i9660 = 0, i9661 = 0, i9662 = 0, i9663 = 0, i9664 = 0, i9665 = 0, i9666 = 0, i9667 = 0, i9668 = 0, i9669 = 0;
+        int i9670 = 0, i9671 = 0, i9672 = 0, i9673 = 0, i9674 = 0, i9675 = 0, i9676 = 0, i9677 = 0, i9678 = 0, i9679 = 0;
+        int i9680 = 0, i9681 = 0, i9682 = 0, i9683 = 0, i9684 = 0, i9685 = 0, i9686 = 0, i9687 = 0, i9688 = 0, i9689 = 0;
+        int i9690 = 0, i9691 = 0, i9692 = 0, i9693 = 0, i9694 = 0, i9695 = 0, i9696 = 0, i9697 = 0, i9698 = 0, i9699 = 0;
+        int i9700 = 0, i9701 = 0, i9702 = 0, i9703 = 0, i9704 = 0, i9705 = 0, i9706 = 0, i9707 = 0, i9708 = 0, i9709 = 0;
+        int i9710 = 0, i9711 = 0, i9712 = 0, i9713 = 0, i9714 = 0, i9715 = 0, i9716 = 0, i9717 = 0, i9718 = 0, i9719 = 0;
+        int i9720 = 0, i9721 = 0, i9722 = 0, i9723 = 0, i9724 = 0, i9725 = 0, i9726 = 0, i9727 = 0, i9728 = 0, i9729 = 0;
+        int i9730 = 0, i9731 = 0, i9732 = 0, i9733 = 0, i9734 = 0, i9735 = 0, i9736 = 0, i9737 = 0, i9738 = 0, i9739 = 0;
+        int i9740 = 0, i9741 = 0, i9742 = 0, i9743 = 0, i9744 = 0, i9745 = 0, i9746 = 0, i9747 = 0, i9748 = 0, i9749 = 0;
+        int i9750 = 0, i9751 = 0, i9752 = 0, i9753 = 0, i9754 = 0, i9755 = 0, i9756 = 0, i9757 = 0, i9758 = 0, i9759 = 0;
+        int i9760 = 0, i9761 = 0, i9762 = 0, i9763 = 0, i9764 = 0, i9765 = 0, i9766 = 0, i9767 = 0, i9768 = 0, i9769 = 0;
+        int i9770 = 0, i9771 = 0, i9772 = 0, i9773 = 0, i9774 = 0, i9775 = 0, i9776 = 0, i9777 = 0, i9778 = 0, i9779 = 0;
+        int i9780 = 0, i9781 = 0, i9782 = 0, i9783 = 0, i9784 = 0, i9785 = 0, i9786 = 0, i9787 = 0, i9788 = 0, i9789 = 0;
+        int i9790 = 0, i9791 = 0, i9792 = 0, i9793 = 0, i9794 = 0, i9795 = 0, i9796 = 0, i9797 = 0, i9798 = 0, i9799 = 0;
+        int i9800 = 0, i9801 = 0, i9802 = 0, i9803 = 0, i9804 = 0, i9805 = 0, i9806 = 0, i9807 = 0, i9808 = 0, i9809 = 0;
+        int i9810 = 0, i9811 = 0, i9812 = 0, i9813 = 0, i9814 = 0, i9815 = 0, i9816 = 0, i9817 = 0, i9818 = 0, i9819 = 0;
+        int i9820 = 0, i9821 = 0, i9822 = 0, i9823 = 0, i9824 = 0, i9825 = 0, i9826 = 0, i9827 = 0, i9828 = 0, i9829 = 0;
+        int i9830 = 0, i9831 = 0, i9832 = 0, i9833 = 0, i9834 = 0, i9835 = 0, i9836 = 0, i9837 = 0, i9838 = 0, i9839 = 0;
+        int i9840 = 0, i9841 = 0, i9842 = 0, i9843 = 0, i9844 = 0, i9845 = 0, i9846 = 0, i9847 = 0, i9848 = 0, i9849 = 0;
+        int i9850 = 0, i9851 = 0, i9852 = 0, i9853 = 0, i9854 = 0, i9855 = 0, i9856 = 0, i9857 = 0, i9858 = 0, i9859 = 0;
+        int i9860 = 0, i9861 = 0, i9862 = 0, i9863 = 0, i9864 = 0, i9865 = 0, i9866 = 0, i9867 = 0, i9868 = 0, i9869 = 0;
+        int i9870 = 0, i9871 = 0, i9872 = 0, i9873 = 0, i9874 = 0, i9875 = 0, i9876 = 0, i9877 = 0, i9878 = 0, i9879 = 0;
+        int i9880 = 0, i9881 = 0, i9882 = 0, i9883 = 0, i9884 = 0, i9885 = 0, i9886 = 0, i9887 = 0, i9888 = 0, i9889 = 0;
+        int i9890 = 0, i9891 = 0, i9892 = 0, i9893 = 0, i9894 = 0, i9895 = 0, i9896 = 0, i9897 = 0, i9898 = 0, i9899 = 0;
+        int i9900 = 0, i9901 = 0, i9902 = 0, i9903 = 0, i9904 = 0, i9905 = 0, i9906 = 0, i9907 = 0, i9908 = 0, i9909 = 0;
+        int i9910 = 0, i9911 = 0, i9912 = 0, i9913 = 0, i9914 = 0, i9915 = 0, i9916 = 0, i9917 = 0, i9918 = 0, i9919 = 0;
+        int i9920 = 0, i9921 = 0, i9922 = 0, i9923 = 0, i9924 = 0, i9925 = 0, i9926 = 0, i9927 = 0, i9928 = 0, i9929 = 0;
+        int i9930 = 0, i9931 = 0, i9932 = 0, i9933 = 0, i9934 = 0, i9935 = 0, i9936 = 0, i9937 = 0, i9938 = 0, i9939 = 0;
+        int i9940 = 0, i9941 = 0, i9942 = 0, i9943 = 0, i9944 = 0, i9945 = 0, i9946 = 0, i9947 = 0, i9948 = 0, i9949 = 0;
+        int i9950 = 0, i9951 = 0, i9952 = 0, i9953 = 0, i9954 = 0, i9955 = 0, i9956 = 0, i9957 = 0, i9958 = 0, i9959 = 0;
+        int i9960 = 0, i9961 = 0, i9962 = 0, i9963 = 0, i9964 = 0, i9965 = 0, i9966 = 0, i9967 = 0, i9968 = 0, i9969 = 0;
+        int i9970 = 0, i9971 = 0, i9972 = 0, i9973 = 0, i9974 = 0, i9975 = 0, i9976 = 0, i9977 = 0, i9978 = 0, i9979 = 0;
+        int i9980 = 0, i9981 = 0, i9982 = 0, i9983 = 0, i9984 = 0, i9985 = 0, i9986 = 0, i9987 = 0, i9988 = 0, i9989 = 0;
+        int i9990 = 0, i9991 = 0, i9992 = 0, i9993 = 0, i9994 = 0, i9995 = 0, i9996 = 0, i9997 = 0, i9998 = 0, i9999 = 0;
+        int i10000 = 0, i10001 = 0, i10002 = 0, i10003 = 0, i10004 = 0, i10005 = 0, i10006 = 0, i10007 = 0, i10008 = 0, i10009 = 0;
+        int i10010 = 0, i10011 = 0, i10012 = 0, i10013 = 0, i10014 = 0, i10015 = 0, i10016 = 0, i10017 = 0, i10018 = 0, i10019 = 0;
+        int i10020 = 0, i10021 = 0, i10022 = 0, i10023 = 0, i10024 = 0, i10025 = 0, i10026 = 0, i10027 = 0, i10028 = 0, i10029 = 0;
+        int i10030 = 0, i10031 = 0, i10032 = 0, i10033 = 0, i10034 = 0, i10035 = 0, i10036 = 0, i10037 = 0, i10038 = 0, i10039 = 0;
+        int i10040 = 0, i10041 = 0, i10042 = 0, i10043 = 0, i10044 = 0, i10045 = 0, i10046 = 0, i10047 = 0, i10048 = 0, i10049 = 0;
+        int i10050 = 0, i10051 = 0, i10052 = 0, i10053 = 0, i10054 = 0, i10055 = 0, i10056 = 0, i10057 = 0, i10058 = 0, i10059 = 0;
+        int i10060 = 0, i10061 = 0, i10062 = 0, i10063 = 0, i10064 = 0, i10065 = 0, i10066 = 0, i10067 = 0, i10068 = 0, i10069 = 0;
+        int i10070 = 0, i10071 = 0, i10072 = 0, i10073 = 0, i10074 = 0, i10075 = 0, i10076 = 0, i10077 = 0, i10078 = 0, i10079 = 0;
+        int i10080 = 0, i10081 = 0, i10082 = 0, i10083 = 0, i10084 = 0, i10085 = 0, i10086 = 0, i10087 = 0, i10088 = 0, i10089 = 0;
+        int i10090 = 0, i10091 = 0, i10092 = 0, i10093 = 0, i10094 = 0, i10095 = 0, i10096 = 0, i10097 = 0, i10098 = 0, i10099 = 0;
+        int i10100 = 0, i10101 = 0, i10102 = 0, i10103 = 0, i10104 = 0, i10105 = 0, i10106 = 0, i10107 = 0, i10108 = 0, i10109 = 0;
+        int i10110 = 0, i10111 = 0, i10112 = 0, i10113 = 0, i10114 = 0, i10115 = 0, i10116 = 0, i10117 = 0, i10118 = 0, i10119 = 0;
+        int i10120 = 0, i10121 = 0, i10122 = 0, i10123 = 0, i10124 = 0, i10125 = 0, i10126 = 0, i10127 = 0, i10128 = 0, i10129 = 0;
+        int i10130 = 0, i10131 = 0, i10132 = 0, i10133 = 0, i10134 = 0, i10135 = 0, i10136 = 0, i10137 = 0, i10138 = 0, i10139 = 0;
+        int i10140 = 0, i10141 = 0, i10142 = 0, i10143 = 0, i10144 = 0, i10145 = 0, i10146 = 0, i10147 = 0, i10148 = 0, i10149 = 0;
+        int i10150 = 0, i10151 = 0, i10152 = 0, i10153 = 0, i10154 = 0, i10155 = 0, i10156 = 0, i10157 = 0, i10158 = 0, i10159 = 0;
+        int i10160 = 0, i10161 = 0, i10162 = 0, i10163 = 0, i10164 = 0, i10165 = 0, i10166 = 0, i10167 = 0, i10168 = 0, i10169 = 0;
+        int i10170 = 0, i10171 = 0, i10172 = 0, i10173 = 0, i10174 = 0, i10175 = 0, i10176 = 0, i10177 = 0, i10178 = 0, i10179 = 0;
+        int i10180 = 0, i10181 = 0, i10182 = 0, i10183 = 0, i10184 = 0, i10185 = 0, i10186 = 0, i10187 = 0, i10188 = 0, i10189 = 0;
+        int i10190 = 0, i10191 = 0, i10192 = 0, i10193 = 0, i10194 = 0, i10195 = 0, i10196 = 0, i10197 = 0, i10198 = 0, i10199 = 0;
+        int i10200 = 0, i10201 = 0, i10202 = 0, i10203 = 0, i10204 = 0, i10205 = 0, i10206 = 0, i10207 = 0, i10208 = 0, i10209 = 0;
+        int i10210 = 0, i10211 = 0, i10212 = 0, i10213 = 0, i10214 = 0, i10215 = 0, i10216 = 0, i10217 = 0, i10218 = 0, i10219 = 0;
+        int i10220 = 0, i10221 = 0, i10222 = 0, i10223 = 0, i10224 = 0, i10225 = 0, i10226 = 0, i10227 = 0, i10228 = 0, i10229 = 0;
+        int i10230 = 0, i10231 = 0, i10232 = 0, i10233 = 0, i10234 = 0, i10235 = 0, i10236 = 0, i10237 = 0, i10238 = 0, i10239 = 0;
+        int i10240 = 0, i10241 = 0, i10242 = 0, i10243 = 0, i10244 = 0, i10245 = 0, i10246 = 0, i10247 = 0, i10248 = 0, i10249 = 0;
+        int i10250 = 0, i10251 = 0, i10252 = 0, i10253 = 0, i10254 = 0, i10255 = 0, i10256 = 0, i10257 = 0, i10258 = 0, i10259 = 0;
+        int i10260 = 0, i10261 = 0, i10262 = 0, i10263 = 0, i10264 = 0, i10265 = 0, i10266 = 0, i10267 = 0, i10268 = 0, i10269 = 0;
+        int i10270 = 0, i10271 = 0, i10272 = 0, i10273 = 0, i10274 = 0, i10275 = 0, i10276 = 0, i10277 = 0, i10278 = 0, i10279 = 0;
+        int i10280 = 0, i10281 = 0, i10282 = 0, i10283 = 0, i10284 = 0, i10285 = 0, i10286 = 0, i10287 = 0, i10288 = 0, i10289 = 0;
+        int i10290 = 0, i10291 = 0, i10292 = 0, i10293 = 0, i10294 = 0, i10295 = 0, i10296 = 0, i10297 = 0, i10298 = 0, i10299 = 0;
+        int i10300 = 0, i10301 = 0, i10302 = 0, i10303 = 0, i10304 = 0, i10305 = 0, i10306 = 0, i10307 = 0, i10308 = 0, i10309 = 0;
+        int i10310 = 0, i10311 = 0, i10312 = 0, i10313 = 0, i10314 = 0, i10315 = 0, i10316 = 0, i10317 = 0, i10318 = 0, i10319 = 0;
+        int i10320 = 0, i10321 = 0, i10322 = 0, i10323 = 0, i10324 = 0, i10325 = 0, i10326 = 0, i10327 = 0, i10328 = 0, i10329 = 0;
+        int i10330 = 0, i10331 = 0, i10332 = 0, i10333 = 0, i10334 = 0, i10335 = 0, i10336 = 0, i10337 = 0, i10338 = 0, i10339 = 0;
+        int i10340 = 0, i10341 = 0, i10342 = 0, i10343 = 0, i10344 = 0, i10345 = 0, i10346 = 0, i10347 = 0, i10348 = 0, i10349 = 0;
+        int i10350 = 0, i10351 = 0, i10352 = 0, i10353 = 0, i10354 = 0, i10355 = 0, i10356 = 0, i10357 = 0, i10358 = 0, i10359 = 0;
+        int i10360 = 0, i10361 = 0, i10362 = 0, i10363 = 0, i10364 = 0, i10365 = 0, i10366 = 0, i10367 = 0, i10368 = 0, i10369 = 0;
+        int i10370 = 0, i10371 = 0, i10372 = 0, i10373 = 0, i10374 = 0, i10375 = 0, i10376 = 0, i10377 = 0, i10378 = 0, i10379 = 0;
+        int i10380 = 0, i10381 = 0, i10382 = 0, i10383 = 0, i10384 = 0, i10385 = 0, i10386 = 0, i10387 = 0, i10388 = 0, i10389 = 0;
+        int i10390 = 0, i10391 = 0, i10392 = 0, i10393 = 0, i10394 = 0, i10395 = 0, i10396 = 0, i10397 = 0, i10398 = 0, i10399 = 0;
+        int i10400 = 0, i10401 = 0, i10402 = 0, i10403 = 0, i10404 = 0, i10405 = 0, i10406 = 0, i10407 = 0, i10408 = 0, i10409 = 0;
+        int i10410 = 0, i10411 = 0, i10412 = 0, i10413 = 0, i10414 = 0, i10415 = 0, i10416 = 0, i10417 = 0, i10418 = 0, i10419 = 0;
+        int i10420 = 0, i10421 = 0, i10422 = 0, i10423 = 0, i10424 = 0, i10425 = 0, i10426 = 0, i10427 = 0, i10428 = 0, i10429 = 0;
+        int i10430 = 0, i10431 = 0, i10432 = 0, i10433 = 0, i10434 = 0, i10435 = 0, i10436 = 0, i10437 = 0, i10438 = 0, i10439 = 0;
+        int i10440 = 0, i10441 = 0, i10442 = 0, i10443 = 0, i10444 = 0, i10445 = 0, i10446 = 0, i10447 = 0, i10448 = 0, i10449 = 0;
+        int i10450 = 0, i10451 = 0, i10452 = 0, i10453 = 0, i10454 = 0, i10455 = 0, i10456 = 0, i10457 = 0, i10458 = 0, i10459 = 0;
+        int i10460 = 0, i10461 = 0, i10462 = 0, i10463 = 0, i10464 = 0, i10465 = 0, i10466 = 0, i10467 = 0, i10468 = 0, i10469 = 0;
+        int i10470 = 0, i10471 = 0, i10472 = 0, i10473 = 0, i10474 = 0, i10475 = 0, i10476 = 0, i10477 = 0, i10478 = 0, i10479 = 0;
+        int i10480 = 0, i10481 = 0, i10482 = 0, i10483 = 0, i10484 = 0, i10485 = 0, i10486 = 0, i10487 = 0, i10488 = 0, i10489 = 0;
+        int i10490 = 0, i10491 = 0, i10492 = 0, i10493 = 0, i10494 = 0, i10495 = 0, i10496 = 0, i10497 = 0, i10498 = 0, i10499 = 0;
+        int i10500 = 0, i10501 = 0, i10502 = 0, i10503 = 0, i10504 = 0, i10505 = 0, i10506 = 0, i10507 = 0, i10508 = 0, i10509 = 0;
+        int i10510 = 0, i10511 = 0, i10512 = 0, i10513 = 0, i10514 = 0, i10515 = 0, i10516 = 0, i10517 = 0, i10518 = 0, i10519 = 0;
+        int i10520 = 0, i10521 = 0, i10522 = 0, i10523 = 0, i10524 = 0, i10525 = 0, i10526 = 0, i10527 = 0, i10528 = 0, i10529 = 0;
+        int i10530 = 0, i10531 = 0, i10532 = 0, i10533 = 0, i10534 = 0, i10535 = 0, i10536 = 0, i10537 = 0, i10538 = 0, i10539 = 0;
+        int i10540 = 0, i10541 = 0, i10542 = 0, i10543 = 0, i10544 = 0, i10545 = 0, i10546 = 0, i10547 = 0, i10548 = 0, i10549 = 0;
+        int i10550 = 0, i10551 = 0, i10552 = 0, i10553 = 0, i10554 = 0, i10555 = 0, i10556 = 0, i10557 = 0, i10558 = 0, i10559 = 0;
+        int i10560 = 0, i10561 = 0, i10562 = 0, i10563 = 0, i10564 = 0, i10565 = 0, i10566 = 0, i10567 = 0, i10568 = 0, i10569 = 0;
+        int i10570 = 0, i10571 = 0, i10572 = 0, i10573 = 0, i10574 = 0, i10575 = 0, i10576 = 0, i10577 = 0, i10578 = 0, i10579 = 0;
+        int i10580 = 0, i10581 = 0, i10582 = 0, i10583 = 0, i10584 = 0, i10585 = 0, i10586 = 0, i10587 = 0, i10588 = 0, i10589 = 0;
+        int i10590 = 0, i10591 = 0, i10592 = 0, i10593 = 0, i10594 = 0, i10595 = 0, i10596 = 0, i10597 = 0, i10598 = 0, i10599 = 0;
+        int i10600 = 0, i10601 = 0, i10602 = 0, i10603 = 0, i10604 = 0, i10605 = 0, i10606 = 0, i10607 = 0, i10608 = 0, i10609 = 0;
+        int i10610 = 0, i10611 = 0, i10612 = 0, i10613 = 0, i10614 = 0, i10615 = 0, i10616 = 0, i10617 = 0, i10618 = 0, i10619 = 0;
+        int i10620 = 0, i10621 = 0, i10622 = 0, i10623 = 0, i10624 = 0, i10625 = 0, i10626 = 0, i10627 = 0, i10628 = 0, i10629 = 0;
+        int i10630 = 0, i10631 = 0, i10632 = 0, i10633 = 0, i10634 = 0, i10635 = 0, i10636 = 0, i10637 = 0, i10638 = 0, i10639 = 0;
+        int i10640 = 0, i10641 = 0, i10642 = 0, i10643 = 0, i10644 = 0, i10645 = 0, i10646 = 0, i10647 = 0, i10648 = 0, i10649 = 0;
+        int i10650 = 0, i10651 = 0, i10652 = 0, i10653 = 0, i10654 = 0, i10655 = 0, i10656 = 0, i10657 = 0, i10658 = 0, i10659 = 0;
+        int i10660 = 0, i10661 = 0, i10662 = 0, i10663 = 0, i10664 = 0, i10665 = 0, i10666 = 0, i10667 = 0, i10668 = 0, i10669 = 0;
+        int i10670 = 0, i10671 = 0, i10672 = 0, i10673 = 0, i10674 = 0, i10675 = 0, i10676 = 0, i10677 = 0, i10678 = 0, i10679 = 0;
+        int i10680 = 0, i10681 = 0, i10682 = 0, i10683 = 0, i10684 = 0, i10685 = 0, i10686 = 0, i10687 = 0, i10688 = 0, i10689 = 0;
+        int i10690 = 0, i10691 = 0, i10692 = 0, i10693 = 0, i10694 = 0, i10695 = 0, i10696 = 0, i10697 = 0, i10698 = 0, i10699 = 0;
+        int i10700 = 0, i10701 = 0, i10702 = 0, i10703 = 0, i10704 = 0, i10705 = 0, i10706 = 0, i10707 = 0, i10708 = 0, i10709 = 0;
+        int i10710 = 0, i10711 = 0, i10712 = 0, i10713 = 0, i10714 = 0, i10715 = 0, i10716 = 0, i10717 = 0, i10718 = 0, i10719 = 0;
+        int i10720 = 0, i10721 = 0, i10722 = 0, i10723 = 0, i10724 = 0, i10725 = 0, i10726 = 0, i10727 = 0, i10728 = 0, i10729 = 0;
+        int i10730 = 0, i10731 = 0, i10732 = 0, i10733 = 0, i10734 = 0, i10735 = 0, i10736 = 0, i10737 = 0, i10738 = 0, i10739 = 0;
+        int i10740 = 0, i10741 = 0, i10742 = 0, i10743 = 0, i10744 = 0, i10745 = 0, i10746 = 0, i10747 = 0, i10748 = 0, i10749 = 0;
+        int i10750 = 0, i10751 = 0, i10752 = 0, i10753 = 0, i10754 = 0, i10755 = 0, i10756 = 0, i10757 = 0, i10758 = 0, i10759 = 0;
+        int i10760 = 0, i10761 = 0, i10762 = 0, i10763 = 0, i10764 = 0, i10765 = 0, i10766 = 0, i10767 = 0, i10768 = 0, i10769 = 0;
+        int i10770 = 0, i10771 = 0, i10772 = 0, i10773 = 0, i10774 = 0, i10775 = 0, i10776 = 0, i10777 = 0, i10778 = 0, i10779 = 0;
+        int i10780 = 0, i10781 = 0, i10782 = 0, i10783 = 0, i10784 = 0, i10785 = 0, i10786 = 0, i10787 = 0, i10788 = 0, i10789 = 0;
+        int i10790 = 0, i10791 = 0, i10792 = 0, i10793 = 0, i10794 = 0, i10795 = 0, i10796 = 0, i10797 = 0, i10798 = 0, i10799 = 0;
+        int i10800 = 0, i10801 = 0, i10802 = 0, i10803 = 0, i10804 = 0, i10805 = 0, i10806 = 0, i10807 = 0, i10808 = 0, i10809 = 0;
+        int i10810 = 0, i10811 = 0, i10812 = 0, i10813 = 0, i10814 = 0, i10815 = 0, i10816 = 0, i10817 = 0, i10818 = 0, i10819 = 0;
+        int i10820 = 0, i10821 = 0, i10822 = 0, i10823 = 0, i10824 = 0, i10825 = 0, i10826 = 0, i10827 = 0, i10828 = 0, i10829 = 0;
+        int i10830 = 0, i10831 = 0, i10832 = 0, i10833 = 0, i10834 = 0, i10835 = 0, i10836 = 0, i10837 = 0, i10838 = 0, i10839 = 0;
+        int i10840 = 0, i10841 = 0, i10842 = 0, i10843 = 0, i10844 = 0, i10845 = 0, i10846 = 0, i10847 = 0, i10848 = 0, i10849 = 0;
+        int i10850 = 0, i10851 = 0, i10852 = 0, i10853 = 0, i10854 = 0, i10855 = 0, i10856 = 0, i10857 = 0, i10858 = 0, i10859 = 0;
+        int i10860 = 0, i10861 = 0, i10862 = 0, i10863 = 0, i10864 = 0, i10865 = 0, i10866 = 0, i10867 = 0, i10868 = 0, i10869 = 0;
+        int i10870 = 0, i10871 = 0, i10872 = 0, i10873 = 0, i10874 = 0, i10875 = 0, i10876 = 0, i10877 = 0, i10878 = 0, i10879 = 0;
+        int i10880 = 0, i10881 = 0, i10882 = 0, i10883 = 0, i10884 = 0, i10885 = 0, i10886 = 0, i10887 = 0, i10888 = 0, i10889 = 0;
+        int i10890 = 0, i10891 = 0, i10892 = 0, i10893 = 0, i10894 = 0, i10895 = 0, i10896 = 0, i10897 = 0, i10898 = 0, i10899 = 0;
+        int i10900 = 0, i10901 = 0, i10902 = 0, i10903 = 0, i10904 = 0, i10905 = 0, i10906 = 0, i10907 = 0, i10908 = 0, i10909 = 0;
+        int i10910 = 0, i10911 = 0, i10912 = 0, i10913 = 0, i10914 = 0, i10915 = 0, i10916 = 0, i10917 = 0, i10918 = 0, i10919 = 0;
+        int i10920 = 0, i10921 = 0, i10922 = 0, i10923 = 0, i10924 = 0, i10925 = 0, i10926 = 0, i10927 = 0, i10928 = 0, i10929 = 0;
+        int i10930 = 0, i10931 = 0, i10932 = 0, i10933 = 0, i10934 = 0, i10935 = 0, i10936 = 0, i10937 = 0, i10938 = 0, i10939 = 0;
+        int i10940 = 0, i10941 = 0, i10942 = 0, i10943 = 0, i10944 = 0, i10945 = 0, i10946 = 0, i10947 = 0, i10948 = 0, i10949 = 0;
+        int i10950 = 0, i10951 = 0, i10952 = 0, i10953 = 0, i10954 = 0, i10955 = 0, i10956 = 0, i10957 = 0, i10958 = 0, i10959 = 0;
+        int i10960 = 0, i10961 = 0, i10962 = 0, i10963 = 0, i10964 = 0, i10965 = 0, i10966 = 0, i10967 = 0, i10968 = 0, i10969 = 0;
+        int i10970 = 0, i10971 = 0, i10972 = 0, i10973 = 0, i10974 = 0, i10975 = 0, i10976 = 0, i10977 = 0, i10978 = 0, i10979 = 0;
+        int i10980 = 0, i10981 = 0, i10982 = 0, i10983 = 0, i10984 = 0, i10985 = 0, i10986 = 0, i10987 = 0, i10988 = 0, i10989 = 0;
+        int i10990 = 0, i10991 = 0, i10992 = 0, i10993 = 0, i10994 = 0, i10995 = 0, i10996 = 0, i10997 = 0, i10998 = 0, i10999 = 0;
+        int i11000 = 0, i11001 = 0, i11002 = 0, i11003 = 0, i11004 = 0, i11005 = 0, i11006 = 0, i11007 = 0, i11008 = 0, i11009 = 0;
+        int i11010 = 0, i11011 = 0, i11012 = 0, i11013 = 0, i11014 = 0, i11015 = 0, i11016 = 0, i11017 = 0, i11018 = 0, i11019 = 0;
+        int i11020 = 0, i11021 = 0, i11022 = 0, i11023 = 0, i11024 = 0, i11025 = 0, i11026 = 0, i11027 = 0, i11028 = 0, i11029 = 0;
+        int i11030 = 0, i11031 = 0, i11032 = 0, i11033 = 0, i11034 = 0, i11035 = 0, i11036 = 0, i11037 = 0, i11038 = 0, i11039 = 0;
+        int i11040 = 0, i11041 = 0, i11042 = 0, i11043 = 0, i11044 = 0, i11045 = 0, i11046 = 0, i11047 = 0, i11048 = 0, i11049 = 0;
+        int i11050 = 0, i11051 = 0, i11052 = 0, i11053 = 0, i11054 = 0, i11055 = 0, i11056 = 0, i11057 = 0, i11058 = 0, i11059 = 0;
+        int i11060 = 0, i11061 = 0, i11062 = 0, i11063 = 0, i11064 = 0, i11065 = 0, i11066 = 0, i11067 = 0, i11068 = 0, i11069 = 0;
+        int i11070 = 0, i11071 = 0, i11072 = 0, i11073 = 0, i11074 = 0, i11075 = 0, i11076 = 0, i11077 = 0, i11078 = 0, i11079 = 0;
+        int i11080 = 0, i11081 = 0, i11082 = 0, i11083 = 0, i11084 = 0, i11085 = 0, i11086 = 0, i11087 = 0, i11088 = 0, i11089 = 0;
+        int i11090 = 0, i11091 = 0, i11092 = 0, i11093 = 0, i11094 = 0, i11095 = 0, i11096 = 0, i11097 = 0, i11098 = 0, i11099 = 0;
+        int i11100 = 0, i11101 = 0, i11102 = 0, i11103 = 0, i11104 = 0, i11105 = 0, i11106 = 0, i11107 = 0, i11108 = 0, i11109 = 0;
+        int i11110 = 0, i11111 = 0, i11112 = 0, i11113 = 0, i11114 = 0, i11115 = 0, i11116 = 0, i11117 = 0, i11118 = 0, i11119 = 0;
+        int i11120 = 0, i11121 = 0, i11122 = 0, i11123 = 0, i11124 = 0, i11125 = 0, i11126 = 0, i11127 = 0, i11128 = 0, i11129 = 0;
+        int i11130 = 0, i11131 = 0, i11132 = 0, i11133 = 0, i11134 = 0, i11135 = 0, i11136 = 0, i11137 = 0, i11138 = 0, i11139 = 0;
+        int i11140 = 0, i11141 = 0, i11142 = 0, i11143 = 0, i11144 = 0, i11145 = 0, i11146 = 0, i11147 = 0, i11148 = 0, i11149 = 0;
+        int i11150 = 0, i11151 = 0, i11152 = 0, i11153 = 0, i11154 = 0, i11155 = 0, i11156 = 0, i11157 = 0, i11158 = 0, i11159 = 0;
+        int i11160 = 0, i11161 = 0, i11162 = 0, i11163 = 0, i11164 = 0, i11165 = 0, i11166 = 0, i11167 = 0, i11168 = 0, i11169 = 0;
+        int i11170 = 0, i11171 = 0, i11172 = 0, i11173 = 0, i11174 = 0, i11175 = 0, i11176 = 0, i11177 = 0, i11178 = 0, i11179 = 0;
+        int i11180 = 0, i11181 = 0, i11182 = 0, i11183 = 0, i11184 = 0, i11185 = 0, i11186 = 0, i11187 = 0, i11188 = 0, i11189 = 0;
+        int i11190 = 0, i11191 = 0, i11192 = 0, i11193 = 0, i11194 = 0, i11195 = 0, i11196 = 0, i11197 = 0, i11198 = 0, i11199 = 0;
+        int i11200 = 0, i11201 = 0, i11202 = 0, i11203 = 0, i11204 = 0, i11205 = 0, i11206 = 0, i11207 = 0, i11208 = 0, i11209 = 0;
+        int i11210 = 0, i11211 = 0, i11212 = 0, i11213 = 0, i11214 = 0, i11215 = 0, i11216 = 0, i11217 = 0, i11218 = 0, i11219 = 0;
+        int i11220 = 0, i11221 = 0, i11222 = 0, i11223 = 0, i11224 = 0, i11225 = 0, i11226 = 0, i11227 = 0, i11228 = 0, i11229 = 0;
+        int i11230 = 0, i11231 = 0, i11232 = 0, i11233 = 0, i11234 = 0, i11235 = 0, i11236 = 0, i11237 = 0, i11238 = 0, i11239 = 0;
+        int i11240 = 0, i11241 = 0, i11242 = 0, i11243 = 0, i11244 = 0, i11245 = 0, i11246 = 0, i11247 = 0, i11248 = 0, i11249 = 0;
+        int i11250 = 0, i11251 = 0, i11252 = 0, i11253 = 0, i11254 = 0, i11255 = 0, i11256 = 0, i11257 = 0, i11258 = 0, i11259 = 0;
+        int i11260 = 0, i11261 = 0, i11262 = 0, i11263 = 0, i11264 = 0, i11265 = 0, i11266 = 0, i11267 = 0, i11268 = 0, i11269 = 0;
+        int i11270 = 0, i11271 = 0, i11272 = 0, i11273 = 0, i11274 = 0, i11275 = 0, i11276 = 0, i11277 = 0, i11278 = 0, i11279 = 0;
+        int i11280 = 0, i11281 = 0, i11282 = 0, i11283 = 0, i11284 = 0, i11285 = 0, i11286 = 0, i11287 = 0, i11288 = 0, i11289 = 0;
+        int i11290 = 0, i11291 = 0, i11292 = 0, i11293 = 0, i11294 = 0, i11295 = 0, i11296 = 0, i11297 = 0, i11298 = 0, i11299 = 0;
+        int i11300 = 0, i11301 = 0, i11302 = 0, i11303 = 0, i11304 = 0, i11305 = 0, i11306 = 0, i11307 = 0, i11308 = 0, i11309 = 0;
+        int i11310 = 0, i11311 = 0, i11312 = 0, i11313 = 0, i11314 = 0, i11315 = 0, i11316 = 0, i11317 = 0, i11318 = 0, i11319 = 0;
+        int i11320 = 0, i11321 = 0, i11322 = 0, i11323 = 0, i11324 = 0, i11325 = 0, i11326 = 0, i11327 = 0, i11328 = 0, i11329 = 0;
+        int i11330 = 0, i11331 = 0, i11332 = 0, i11333 = 0, i11334 = 0, i11335 = 0, i11336 = 0, i11337 = 0, i11338 = 0, i11339 = 0;
+        int i11340 = 0, i11341 = 0, i11342 = 0, i11343 = 0, i11344 = 0, i11345 = 0, i11346 = 0, i11347 = 0, i11348 = 0, i11349 = 0;
+        int i11350 = 0, i11351 = 0, i11352 = 0, i11353 = 0, i11354 = 0, i11355 = 0, i11356 = 0, i11357 = 0, i11358 = 0, i11359 = 0;
+        int i11360 = 0, i11361 = 0, i11362 = 0, i11363 = 0, i11364 = 0, i11365 = 0, i11366 = 0, i11367 = 0, i11368 = 0, i11369 = 0;
+        int i11370 = 0, i11371 = 0, i11372 = 0, i11373 = 0, i11374 = 0, i11375 = 0, i11376 = 0, i11377 = 0, i11378 = 0, i11379 = 0;
+        int i11380 = 0, i11381 = 0, i11382 = 0, i11383 = 0, i11384 = 0, i11385 = 0, i11386 = 0, i11387 = 0, i11388 = 0, i11389 = 0;
+        int i11390 = 0, i11391 = 0, i11392 = 0, i11393 = 0, i11394 = 0, i11395 = 0, i11396 = 0, i11397 = 0, i11398 = 0, i11399 = 0;
+        int i11400 = 0, i11401 = 0, i11402 = 0, i11403 = 0, i11404 = 0, i11405 = 0, i11406 = 0, i11407 = 0, i11408 = 0, i11409 = 0;
+        int i11410 = 0, i11411 = 0, i11412 = 0, i11413 = 0, i11414 = 0, i11415 = 0, i11416 = 0, i11417 = 0, i11418 = 0, i11419 = 0;
+        int i11420 = 0, i11421 = 0, i11422 = 0, i11423 = 0, i11424 = 0, i11425 = 0, i11426 = 0, i11427 = 0, i11428 = 0, i11429 = 0;
+        int i11430 = 0, i11431 = 0, i11432 = 0, i11433 = 0, i11434 = 0, i11435 = 0, i11436 = 0, i11437 = 0, i11438 = 0, i11439 = 0;
+        int i11440 = 0, i11441 = 0, i11442 = 0, i11443 = 0, i11444 = 0, i11445 = 0, i11446 = 0, i11447 = 0, i11448 = 0, i11449 = 0;
+        int i11450 = 0, i11451 = 0, i11452 = 0, i11453 = 0, i11454 = 0, i11455 = 0, i11456 = 0, i11457 = 0, i11458 = 0, i11459 = 0;
+        int i11460 = 0, i11461 = 0, i11462 = 0, i11463 = 0, i11464 = 0, i11465 = 0, i11466 = 0, i11467 = 0, i11468 = 0, i11469 = 0;
+        int i11470 = 0, i11471 = 0, i11472 = 0, i11473 = 0, i11474 = 0, i11475 = 0, i11476 = 0, i11477 = 0, i11478 = 0, i11479 = 0;
+        int i11480 = 0, i11481 = 0, i11482 = 0, i11483 = 0, i11484 = 0, i11485 = 0, i11486 = 0, i11487 = 0, i11488 = 0, i11489 = 0;
+        int i11490 = 0, i11491 = 0, i11492 = 0, i11493 = 0, i11494 = 0, i11495 = 0, i11496 = 0, i11497 = 0, i11498 = 0, i11499 = 0;
+        int i11500 = 0, i11501 = 0, i11502 = 0, i11503 = 0, i11504 = 0, i11505 = 0, i11506 = 0, i11507 = 0, i11508 = 0, i11509 = 0;
+        int i11510 = 0, i11511 = 0, i11512 = 0, i11513 = 0, i11514 = 0, i11515 = 0, i11516 = 0, i11517 = 0, i11518 = 0, i11519 = 0;
+        int i11520 = 0, i11521 = 0, i11522 = 0, i11523 = 0, i11524 = 0, i11525 = 0, i11526 = 0, i11527 = 0, i11528 = 0, i11529 = 0;
+        int i11530 = 0, i11531 = 0, i11532 = 0, i11533 = 0, i11534 = 0, i11535 = 0, i11536 = 0, i11537 = 0, i11538 = 0, i11539 = 0;
+        int i11540 = 0, i11541 = 0, i11542 = 0, i11543 = 0, i11544 = 0, i11545 = 0, i11546 = 0, i11547 = 0, i11548 = 0, i11549 = 0;
+        int i11550 = 0, i11551 = 0, i11552 = 0, i11553 = 0, i11554 = 0, i11555 = 0, i11556 = 0, i11557 = 0, i11558 = 0, i11559 = 0;
+        int i11560 = 0, i11561 = 0, i11562 = 0, i11563 = 0, i11564 = 0, i11565 = 0, i11566 = 0, i11567 = 0, i11568 = 0, i11569 = 0;
+        int i11570 = 0, i11571 = 0, i11572 = 0, i11573 = 0, i11574 = 0, i11575 = 0, i11576 = 0, i11577 = 0, i11578 = 0, i11579 = 0;
+        int i11580 = 0, i11581 = 0, i11582 = 0, i11583 = 0, i11584 = 0, i11585 = 0, i11586 = 0, i11587 = 0, i11588 = 0, i11589 = 0;
+        int i11590 = 0, i11591 = 0, i11592 = 0, i11593 = 0, i11594 = 0, i11595 = 0, i11596 = 0, i11597 = 0, i11598 = 0, i11599 = 0;
+        int i11600 = 0, i11601 = 0, i11602 = 0, i11603 = 0, i11604 = 0, i11605 = 0, i11606 = 0, i11607 = 0, i11608 = 0, i11609 = 0;
+        int i11610 = 0, i11611 = 0, i11612 = 0, i11613 = 0, i11614 = 0, i11615 = 0, i11616 = 0, i11617 = 0, i11618 = 0, i11619 = 0;
+        int i11620 = 0, i11621 = 0, i11622 = 0, i11623 = 0, i11624 = 0, i11625 = 0, i11626 = 0, i11627 = 0, i11628 = 0, i11629 = 0;
+        int i11630 = 0, i11631 = 0, i11632 = 0, i11633 = 0, i11634 = 0, i11635 = 0, i11636 = 0, i11637 = 0, i11638 = 0, i11639 = 0;
+        int i11640 = 0, i11641 = 0, i11642 = 0, i11643 = 0, i11644 = 0, i11645 = 0, i11646 = 0, i11647 = 0, i11648 = 0, i11649 = 0;
+        int i11650 = 0, i11651 = 0, i11652 = 0, i11653 = 0, i11654 = 0, i11655 = 0, i11656 = 0, i11657 = 0, i11658 = 0, i11659 = 0;
+        int i11660 = 0, i11661 = 0, i11662 = 0, i11663 = 0, i11664 = 0, i11665 = 0, i11666 = 0, i11667 = 0, i11668 = 0, i11669 = 0;
+        int i11670 = 0, i11671 = 0, i11672 = 0, i11673 = 0, i11674 = 0, i11675 = 0, i11676 = 0, i11677 = 0, i11678 = 0, i11679 = 0;
+        int i11680 = 0, i11681 = 0, i11682 = 0, i11683 = 0, i11684 = 0, i11685 = 0, i11686 = 0, i11687 = 0, i11688 = 0, i11689 = 0;
+        int i11690 = 0, i11691 = 0, i11692 = 0, i11693 = 0, i11694 = 0, i11695 = 0, i11696 = 0, i11697 = 0, i11698 = 0, i11699 = 0;
+        int i11700 = 0, i11701 = 0, i11702 = 0, i11703 = 0, i11704 = 0, i11705 = 0, i11706 = 0, i11707 = 0, i11708 = 0, i11709 = 0;
+        int i11710 = 0, i11711 = 0, i11712 = 0, i11713 = 0, i11714 = 0, i11715 = 0, i11716 = 0, i11717 = 0, i11718 = 0, i11719 = 0;
+        int i11720 = 0, i11721 = 0, i11722 = 0, i11723 = 0, i11724 = 0, i11725 = 0, i11726 = 0, i11727 = 0, i11728 = 0, i11729 = 0;
+        int i11730 = 0, i11731 = 0, i11732 = 0, i11733 = 0, i11734 = 0, i11735 = 0, i11736 = 0, i11737 = 0, i11738 = 0, i11739 = 0;
+        int i11740 = 0, i11741 = 0, i11742 = 0, i11743 = 0, i11744 = 0, i11745 = 0, i11746 = 0, i11747 = 0, i11748 = 0, i11749 = 0;
+        int i11750 = 0, i11751 = 0, i11752 = 0, i11753 = 0, i11754 = 0, i11755 = 0, i11756 = 0, i11757 = 0, i11758 = 0, i11759 = 0;
+        int i11760 = 0, i11761 = 0, i11762 = 0, i11763 = 0, i11764 = 0, i11765 = 0, i11766 = 0, i11767 = 0, i11768 = 0, i11769 = 0;
+        int i11770 = 0, i11771 = 0, i11772 = 0, i11773 = 0, i11774 = 0, i11775 = 0, i11776 = 0, i11777 = 0, i11778 = 0, i11779 = 0;
+        int i11780 = 0, i11781 = 0, i11782 = 0, i11783 = 0, i11784 = 0, i11785 = 0, i11786 = 0, i11787 = 0, i11788 = 0, i11789 = 0;
+        int i11790 = 0, i11791 = 0, i11792 = 0, i11793 = 0, i11794 = 0, i11795 = 0, i11796 = 0, i11797 = 0, i11798 = 0, i11799 = 0;
+        int i11800 = 0, i11801 = 0, i11802 = 0, i11803 = 0, i11804 = 0, i11805 = 0, i11806 = 0, i11807 = 0, i11808 = 0, i11809 = 0;
+        int i11810 = 0, i11811 = 0, i11812 = 0, i11813 = 0, i11814 = 0, i11815 = 0, i11816 = 0, i11817 = 0, i11818 = 0, i11819 = 0;
+        int i11820 = 0, i11821 = 0, i11822 = 0, i11823 = 0, i11824 = 0, i11825 = 0, i11826 = 0, i11827 = 0, i11828 = 0, i11829 = 0;
+        int i11830 = 0, i11831 = 0, i11832 = 0, i11833 = 0, i11834 = 0, i11835 = 0, i11836 = 0, i11837 = 0, i11838 = 0, i11839 = 0;
+        int i11840 = 0, i11841 = 0, i11842 = 0, i11843 = 0, i11844 = 0, i11845 = 0, i11846 = 0, i11847 = 0, i11848 = 0, i11849 = 0;
+        int i11850 = 0, i11851 = 0, i11852 = 0, i11853 = 0, i11854 = 0, i11855 = 0, i11856 = 0, i11857 = 0, i11858 = 0, i11859 = 0;
+        int i11860 = 0, i11861 = 0, i11862 = 0, i11863 = 0, i11864 = 0, i11865 = 0, i11866 = 0, i11867 = 0, i11868 = 0, i11869 = 0;
+        int i11870 = 0, i11871 = 0, i11872 = 0, i11873 = 0, i11874 = 0, i11875 = 0, i11876 = 0, i11877 = 0, i11878 = 0, i11879 = 0;
+        int i11880 = 0, i11881 = 0, i11882 = 0, i11883 = 0, i11884 = 0, i11885 = 0, i11886 = 0, i11887 = 0, i11888 = 0, i11889 = 0;
+        int i11890 = 0, i11891 = 0, i11892 = 0, i11893 = 0, i11894 = 0, i11895 = 0, i11896 = 0, i11897 = 0, i11898 = 0, i11899 = 0;
+        int i11900 = 0, i11901 = 0, i11902 = 0, i11903 = 0, i11904 = 0, i11905 = 0, i11906 = 0, i11907 = 0, i11908 = 0, i11909 = 0;
+        int i11910 = 0, i11911 = 0, i11912 = 0, i11913 = 0, i11914 = 0, i11915 = 0, i11916 = 0, i11917 = 0, i11918 = 0, i11919 = 0;
+        int i11920 = 0, i11921 = 0, i11922 = 0, i11923 = 0, i11924 = 0, i11925 = 0, i11926 = 0, i11927 = 0, i11928 = 0, i11929 = 0;
+        int i11930 = 0, i11931 = 0, i11932 = 0, i11933 = 0, i11934 = 0, i11935 = 0, i11936 = 0, i11937 = 0, i11938 = 0, i11939 = 0;
+        int i11940 = 0, i11941 = 0, i11942 = 0, i11943 = 0, i11944 = 0, i11945 = 0, i11946 = 0, i11947 = 0, i11948 = 0, i11949 = 0;
+        int i11950 = 0, i11951 = 0, i11952 = 0, i11953 = 0, i11954 = 0, i11955 = 0, i11956 = 0, i11957 = 0, i11958 = 0, i11959 = 0;
+        int i11960 = 0, i11961 = 0, i11962 = 0, i11963 = 0, i11964 = 0, i11965 = 0, i11966 = 0, i11967 = 0, i11968 = 0, i11969 = 0;
+        int i11970 = 0, i11971 = 0, i11972 = 0, i11973 = 0, i11974 = 0, i11975 = 0, i11976 = 0, i11977 = 0, i11978 = 0, i11979 = 0;
+        int i11980 = 0, i11981 = 0, i11982 = 0, i11983 = 0, i11984 = 0, i11985 = 0, i11986 = 0, i11987 = 0, i11988 = 0, i11989 = 0;
+        int i11990 = 0, i11991 = 0, i11992 = 0, i11993 = 0, i11994 = 0, i11995 = 0, i11996 = 0, i11997 = 0, i11998 = 0, i11999 = 0;
+        int i12000 = 0, i12001 = 0, i12002 = 0, i12003 = 0, i12004 = 0, i12005 = 0, i12006 = 0, i12007 = 0, i12008 = 0, i12009 = 0;
+        int i12010 = 0, i12011 = 0, i12012 = 0, i12013 = 0, i12014 = 0, i12015 = 0, i12016 = 0, i12017 = 0, i12018 = 0, i12019 = 0;
+        int i12020 = 0, i12021 = 0, i12022 = 0, i12023 = 0, i12024 = 0, i12025 = 0, i12026 = 0, i12027 = 0, i12028 = 0, i12029 = 0;
+        int i12030 = 0, i12031 = 0, i12032 = 0, i12033 = 0, i12034 = 0, i12035 = 0, i12036 = 0, i12037 = 0, i12038 = 0, i12039 = 0;
+        int i12040 = 0, i12041 = 0, i12042 = 0, i12043 = 0, i12044 = 0, i12045 = 0, i12046 = 0, i12047 = 0, i12048 = 0, i12049 = 0;
+        int i12050 = 0, i12051 = 0, i12052 = 0, i12053 = 0, i12054 = 0, i12055 = 0, i12056 = 0, i12057 = 0, i12058 = 0, i12059 = 0;
+        int i12060 = 0, i12061 = 0, i12062 = 0, i12063 = 0, i12064 = 0, i12065 = 0, i12066 = 0, i12067 = 0, i12068 = 0, i12069 = 0;
+        int i12070 = 0, i12071 = 0, i12072 = 0, i12073 = 0, i12074 = 0, i12075 = 0, i12076 = 0, i12077 = 0, i12078 = 0, i12079 = 0;
+        int i12080 = 0, i12081 = 0, i12082 = 0, i12083 = 0, i12084 = 0, i12085 = 0, i12086 = 0, i12087 = 0, i12088 = 0, i12089 = 0;
+        int i12090 = 0, i12091 = 0, i12092 = 0, i12093 = 0, i12094 = 0, i12095 = 0, i12096 = 0, i12097 = 0, i12098 = 0, i12099 = 0;
+        int i12100 = 0, i12101 = 0, i12102 = 0, i12103 = 0, i12104 = 0, i12105 = 0, i12106 = 0, i12107 = 0, i12108 = 0, i12109 = 0;
+        int i12110 = 0, i12111 = 0, i12112 = 0, i12113 = 0, i12114 = 0, i12115 = 0, i12116 = 0, i12117 = 0, i12118 = 0, i12119 = 0;
+        int i12120 = 0, i12121 = 0, i12122 = 0, i12123 = 0, i12124 = 0, i12125 = 0, i12126 = 0, i12127 = 0, i12128 = 0, i12129 = 0;
+        int i12130 = 0, i12131 = 0, i12132 = 0, i12133 = 0, i12134 = 0, i12135 = 0, i12136 = 0, i12137 = 0, i12138 = 0, i12139 = 0;
+        int i12140 = 0, i12141 = 0, i12142 = 0, i12143 = 0, i12144 = 0, i12145 = 0, i12146 = 0, i12147 = 0, i12148 = 0, i12149 = 0;
+        int i12150 = 0, i12151 = 0, i12152 = 0, i12153 = 0, i12154 = 0, i12155 = 0, i12156 = 0, i12157 = 0, i12158 = 0, i12159 = 0;
+        int i12160 = 0, i12161 = 0, i12162 = 0, i12163 = 0, i12164 = 0, i12165 = 0, i12166 = 0, i12167 = 0, i12168 = 0, i12169 = 0;
+        int i12170 = 0, i12171 = 0, i12172 = 0, i12173 = 0, i12174 = 0, i12175 = 0, i12176 = 0, i12177 = 0, i12178 = 0, i12179 = 0;
+        int i12180 = 0, i12181 = 0, i12182 = 0, i12183 = 0, i12184 = 0, i12185 = 0, i12186 = 0, i12187 = 0, i12188 = 0, i12189 = 0;
+        int i12190 = 0, i12191 = 0, i12192 = 0, i12193 = 0, i12194 = 0, i12195 = 0, i12196 = 0, i12197 = 0, i12198 = 0, i12199 = 0;
+        int i12200 = 0, i12201 = 0, i12202 = 0, i12203 = 0, i12204 = 0, i12205 = 0, i12206 = 0, i12207 = 0, i12208 = 0, i12209 = 0;
+        int i12210 = 0, i12211 = 0, i12212 = 0, i12213 = 0, i12214 = 0, i12215 = 0, i12216 = 0, i12217 = 0, i12218 = 0, i12219 = 0;
+        int i12220 = 0, i12221 = 0, i12222 = 0, i12223 = 0, i12224 = 0, i12225 = 0, i12226 = 0, i12227 = 0, i12228 = 0, i12229 = 0;
+        int i12230 = 0, i12231 = 0, i12232 = 0, i12233 = 0, i12234 = 0, i12235 = 0, i12236 = 0, i12237 = 0, i12238 = 0, i12239 = 0;
+        int i12240 = 0, i12241 = 0, i12242 = 0, i12243 = 0, i12244 = 0, i12245 = 0, i12246 = 0, i12247 = 0, i12248 = 0, i12249 = 0;
+        int i12250 = 0, i12251 = 0, i12252 = 0, i12253 = 0, i12254 = 0, i12255 = 0, i12256 = 0, i12257 = 0, i12258 = 0, i12259 = 0;
+        int i12260 = 0, i12261 = 0, i12262 = 0, i12263 = 0, i12264 = 0, i12265 = 0, i12266 = 0, i12267 = 0, i12268 = 0, i12269 = 0;
+        int i12270 = 0, i12271 = 0, i12272 = 0, i12273 = 0, i12274 = 0, i12275 = 0, i12276 = 0, i12277 = 0, i12278 = 0, i12279 = 0;
+        int i12280 = 0, i12281 = 0, i12282 = 0, i12283 = 0, i12284 = 0, i12285 = 0, i12286 = 0, i12287 = 0, i12288 = 0, i12289 = 0;
+        int i12290 = 0, i12291 = 0, i12292 = 0, i12293 = 0, i12294 = 0, i12295 = 0, i12296 = 0, i12297 = 0, i12298 = 0, i12299 = 0;
+        int i12300 = 0, i12301 = 0, i12302 = 0, i12303 = 0, i12304 = 0, i12305 = 0, i12306 = 0, i12307 = 0, i12308 = 0, i12309 = 0;
+        int i12310 = 0, i12311 = 0, i12312 = 0, i12313 = 0, i12314 = 0, i12315 = 0, i12316 = 0, i12317 = 0, i12318 = 0, i12319 = 0;
+        int i12320 = 0, i12321 = 0, i12322 = 0, i12323 = 0, i12324 = 0, i12325 = 0, i12326 = 0, i12327 = 0, i12328 = 0, i12329 = 0;
+        int i12330 = 0, i12331 = 0, i12332 = 0, i12333 = 0, i12334 = 0, i12335 = 0, i12336 = 0, i12337 = 0, i12338 = 0, i12339 = 0;
+        int i12340 = 0, i12341 = 0, i12342 = 0, i12343 = 0, i12344 = 0, i12345 = 0, i12346 = 0, i12347 = 0, i12348 = 0, i12349 = 0;
+        int i12350 = 0, i12351 = 0, i12352 = 0, i12353 = 0, i12354 = 0, i12355 = 0, i12356 = 0, i12357 = 0, i12358 = 0, i12359 = 0;
+        int i12360 = 0, i12361 = 0, i12362 = 0, i12363 = 0, i12364 = 0, i12365 = 0, i12366 = 0, i12367 = 0, i12368 = 0, i12369 = 0;
+        int i12370 = 0, i12371 = 0, i12372 = 0, i12373 = 0, i12374 = 0, i12375 = 0, i12376 = 0, i12377 = 0, i12378 = 0, i12379 = 0;
+        int i12380 = 0, i12381 = 0, i12382 = 0, i12383 = 0, i12384 = 0, i12385 = 0, i12386 = 0, i12387 = 0, i12388 = 0, i12389 = 0;
+        int i12390 = 0, i12391 = 0, i12392 = 0, i12393 = 0, i12394 = 0, i12395 = 0, i12396 = 0, i12397 = 0, i12398 = 0, i12399 = 0;
+        int i12400 = 0, i12401 = 0, i12402 = 0, i12403 = 0, i12404 = 0, i12405 = 0, i12406 = 0, i12407 = 0, i12408 = 0, i12409 = 0;
+        int i12410 = 0, i12411 = 0, i12412 = 0, i12413 = 0, i12414 = 0, i12415 = 0, i12416 = 0, i12417 = 0, i12418 = 0, i12419 = 0;
+        int i12420 = 0, i12421 = 0, i12422 = 0, i12423 = 0, i12424 = 0, i12425 = 0, i12426 = 0, i12427 = 0, i12428 = 0, i12429 = 0;
+        int i12430 = 0, i12431 = 0, i12432 = 0, i12433 = 0, i12434 = 0, i12435 = 0, i12436 = 0, i12437 = 0, i12438 = 0, i12439 = 0;
+        int i12440 = 0, i12441 = 0, i12442 = 0, i12443 = 0, i12444 = 0, i12445 = 0, i12446 = 0, i12447 = 0, i12448 = 0, i12449 = 0;
+        int i12450 = 0, i12451 = 0, i12452 = 0, i12453 = 0, i12454 = 0, i12455 = 0, i12456 = 0, i12457 = 0, i12458 = 0, i12459 = 0;
+        int i12460 = 0, i12461 = 0, i12462 = 0, i12463 = 0, i12464 = 0, i12465 = 0, i12466 = 0, i12467 = 0, i12468 = 0, i12469 = 0;
+        int i12470 = 0, i12471 = 0, i12472 = 0, i12473 = 0, i12474 = 0, i12475 = 0, i12476 = 0, i12477 = 0, i12478 = 0, i12479 = 0;
+        int i12480 = 0, i12481 = 0, i12482 = 0, i12483 = 0, i12484 = 0, i12485 = 0, i12486 = 0, i12487 = 0, i12488 = 0, i12489 = 0;
+        int i12490 = 0, i12491 = 0, i12492 = 0, i12493 = 0, i12494 = 0, i12495 = 0, i12496 = 0, i12497 = 0, i12498 = 0, i12499 = 0;
+        int i12500 = 0, i12501 = 0, i12502 = 0, i12503 = 0, i12504 = 0, i12505 = 0, i12506 = 0, i12507 = 0, i12508 = 0, i12509 = 0;
+        int i12510 = 0, i12511 = 0, i12512 = 0, i12513 = 0, i12514 = 0, i12515 = 0, i12516 = 0, i12517 = 0, i12518 = 0, i12519 = 0;
+        int i12520 = 0, i12521 = 0, i12522 = 0, i12523 = 0, i12524 = 0, i12525 = 0, i12526 = 0, i12527 = 0, i12528 = 0, i12529 = 0;
+        int i12530 = 0, i12531 = 0, i12532 = 0, i12533 = 0, i12534 = 0, i12535 = 0, i12536 = 0, i12537 = 0, i12538 = 0, i12539 = 0;
+        int i12540 = 0, i12541 = 0, i12542 = 0, i12543 = 0, i12544 = 0, i12545 = 0, i12546 = 0, i12547 = 0, i12548 = 0, i12549 = 0;
+        int i12550 = 0, i12551 = 0, i12552 = 0, i12553 = 0, i12554 = 0, i12555 = 0, i12556 = 0, i12557 = 0, i12558 = 0, i12559 = 0;
+        int i12560 = 0, i12561 = 0, i12562 = 0, i12563 = 0, i12564 = 0, i12565 = 0, i12566 = 0, i12567 = 0, i12568 = 0, i12569 = 0;
+        int i12570 = 0, i12571 = 0, i12572 = 0, i12573 = 0, i12574 = 0, i12575 = 0, i12576 = 0, i12577 = 0, i12578 = 0, i12579 = 0;
+        int i12580 = 0, i12581 = 0, i12582 = 0, i12583 = 0, i12584 = 0, i12585 = 0, i12586 = 0, i12587 = 0, i12588 = 0, i12589 = 0;
+        int i12590 = 0, i12591 = 0, i12592 = 0, i12593 = 0, i12594 = 0, i12595 = 0, i12596 = 0, i12597 = 0, i12598 = 0, i12599 = 0;
+        int i12600 = 0, i12601 = 0, i12602 = 0, i12603 = 0, i12604 = 0, i12605 = 0, i12606 = 0, i12607 = 0, i12608 = 0, i12609 = 0;
+        int i12610 = 0, i12611 = 0, i12612 = 0, i12613 = 0, i12614 = 0, i12615 = 0, i12616 = 0, i12617 = 0, i12618 = 0, i12619 = 0;
+        int i12620 = 0, i12621 = 0, i12622 = 0, i12623 = 0, i12624 = 0, i12625 = 0, i12626 = 0, i12627 = 0, i12628 = 0, i12629 = 0;
+        int i12630 = 0, i12631 = 0, i12632 = 0, i12633 = 0, i12634 = 0, i12635 = 0, i12636 = 0, i12637 = 0, i12638 = 0, i12639 = 0;
+        int i12640 = 0, i12641 = 0, i12642 = 0, i12643 = 0, i12644 = 0, i12645 = 0, i12646 = 0, i12647 = 0, i12648 = 0, i12649 = 0;
+        int i12650 = 0, i12651 = 0, i12652 = 0, i12653 = 0, i12654 = 0, i12655 = 0, i12656 = 0, i12657 = 0, i12658 = 0, i12659 = 0;
+        int i12660 = 0, i12661 = 0, i12662 = 0, i12663 = 0, i12664 = 0, i12665 = 0, i12666 = 0, i12667 = 0, i12668 = 0, i12669 = 0;
+        int i12670 = 0, i12671 = 0, i12672 = 0, i12673 = 0, i12674 = 0, i12675 = 0, i12676 = 0, i12677 = 0, i12678 = 0, i12679 = 0;
+        int i12680 = 0, i12681 = 0, i12682 = 0, i12683 = 0, i12684 = 0, i12685 = 0, i12686 = 0, i12687 = 0, i12688 = 0, i12689 = 0;
+        int i12690 = 0, i12691 = 0, i12692 = 0, i12693 = 0, i12694 = 0, i12695 = 0, i12696 = 0, i12697 = 0, i12698 = 0, i12699 = 0;
+        int i12700 = 0, i12701 = 0, i12702 = 0, i12703 = 0, i12704 = 0, i12705 = 0, i12706 = 0, i12707 = 0, i12708 = 0, i12709 = 0;
+        int i12710 = 0, i12711 = 0, i12712 = 0, i12713 = 0, i12714 = 0, i12715 = 0, i12716 = 0, i12717 = 0, i12718 = 0, i12719 = 0;
+        int i12720 = 0, i12721 = 0, i12722 = 0, i12723 = 0, i12724 = 0, i12725 = 0, i12726 = 0, i12727 = 0, i12728 = 0, i12729 = 0;
+        int i12730 = 0, i12731 = 0, i12732 = 0, i12733 = 0, i12734 = 0, i12735 = 0, i12736 = 0, i12737 = 0, i12738 = 0, i12739 = 0;
+        int i12740 = 0, i12741 = 0, i12742 = 0, i12743 = 0, i12744 = 0, i12745 = 0, i12746 = 0, i12747 = 0, i12748 = 0, i12749 = 0;
+        int i12750 = 0, i12751 = 0, i12752 = 0, i12753 = 0, i12754 = 0, i12755 = 0, i12756 = 0, i12757 = 0, i12758 = 0, i12759 = 0;
+        int i12760 = 0, i12761 = 0, i12762 = 0, i12763 = 0, i12764 = 0, i12765 = 0, i12766 = 0, i12767 = 0, i12768 = 0, i12769 = 0;
+        int i12770 = 0, i12771 = 0, i12772 = 0, i12773 = 0, i12774 = 0, i12775 = 0, i12776 = 0, i12777 = 0, i12778 = 0, i12779 = 0;
+        int i12780 = 0, i12781 = 0, i12782 = 0, i12783 = 0, i12784 = 0, i12785 = 0, i12786 = 0, i12787 = 0, i12788 = 0, i12789 = 0;
+        int i12790 = 0, i12791 = 0, i12792 = 0, i12793 = 0, i12794 = 0, i12795 = 0, i12796 = 0, i12797 = 0, i12798 = 0, i12799 = 0;
+        int i12800 = 0, i12801 = 0, i12802 = 0, i12803 = 0, i12804 = 0, i12805 = 0, i12806 = 0, i12807 = 0, i12808 = 0, i12809 = 0;
+        int i12810 = 0, i12811 = 0, i12812 = 0, i12813 = 0, i12814 = 0, i12815 = 0, i12816 = 0, i12817 = 0, i12818 = 0, i12819 = 0;
+        int i12820 = 0, i12821 = 0, i12822 = 0, i12823 = 0, i12824 = 0, i12825 = 0, i12826 = 0, i12827 = 0, i12828 = 0, i12829 = 0;
+        int i12830 = 0, i12831 = 0, i12832 = 0, i12833 = 0, i12834 = 0, i12835 = 0, i12836 = 0, i12837 = 0, i12838 = 0, i12839 = 0;
+        int i12840 = 0, i12841 = 0, i12842 = 0, i12843 = 0, i12844 = 0, i12845 = 0, i12846 = 0, i12847 = 0, i12848 = 0, i12849 = 0;
+        int i12850 = 0, i12851 = 0, i12852 = 0, i12853 = 0, i12854 = 0, i12855 = 0, i12856 = 0, i12857 = 0, i12858 = 0, i12859 = 0;
+        int i12860 = 0, i12861 = 0, i12862 = 0, i12863 = 0, i12864 = 0, i12865 = 0, i12866 = 0, i12867 = 0, i12868 = 0, i12869 = 0;
+        int i12870 = 0, i12871 = 0, i12872 = 0, i12873 = 0, i12874 = 0, i12875 = 0, i12876 = 0, i12877 = 0, i12878 = 0, i12879 = 0;
+        int i12880 = 0, i12881 = 0, i12882 = 0, i12883 = 0, i12884 = 0, i12885 = 0, i12886 = 0, i12887 = 0, i12888 = 0, i12889 = 0;
+        int i12890 = 0, i12891 = 0, i12892 = 0, i12893 = 0, i12894 = 0, i12895 = 0, i12896 = 0, i12897 = 0, i12898 = 0, i12899 = 0;
+        int i12900 = 0, i12901 = 0, i12902 = 0, i12903 = 0, i12904 = 0, i12905 = 0, i12906 = 0, i12907 = 0, i12908 = 0, i12909 = 0;
+        int i12910 = 0, i12911 = 0, i12912 = 0, i12913 = 0, i12914 = 0, i12915 = 0, i12916 = 0, i12917 = 0, i12918 = 0, i12919 = 0;
+        int i12920 = 0, i12921 = 0, i12922 = 0, i12923 = 0, i12924 = 0, i12925 = 0, i12926 = 0, i12927 = 0, i12928 = 0, i12929 = 0;
+        int i12930 = 0, i12931 = 0, i12932 = 0, i12933 = 0, i12934 = 0, i12935 = 0, i12936 = 0, i12937 = 0, i12938 = 0, i12939 = 0;
+        int i12940 = 0, i12941 = 0, i12942 = 0, i12943 = 0, i12944 = 0, i12945 = 0, i12946 = 0, i12947 = 0, i12948 = 0, i12949 = 0;
+        int i12950 = 0, i12951 = 0, i12952 = 0, i12953 = 0, i12954 = 0, i12955 = 0, i12956 = 0, i12957 = 0, i12958 = 0, i12959 = 0;
+        int i12960 = 0, i12961 = 0, i12962 = 0, i12963 = 0, i12964 = 0, i12965 = 0, i12966 = 0, i12967 = 0, i12968 = 0, i12969 = 0;
+        int i12970 = 0, i12971 = 0, i12972 = 0, i12973 = 0, i12974 = 0, i12975 = 0, i12976 = 0, i12977 = 0, i12978 = 0, i12979 = 0;
+        int i12980 = 0, i12981 = 0, i12982 = 0, i12983 = 0, i12984 = 0, i12985 = 0, i12986 = 0, i12987 = 0, i12988 = 0, i12989 = 0;
+        int i12990 = 0, i12991 = 0, i12992 = 0, i12993 = 0, i12994 = 0, i12995 = 0, i12996 = 0, i12997 = 0, i12998 = 0, i12999 = 0;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7116216/StackOverflow.java	Mon Feb 27 15:06:36 2012 -0800
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7116216
+ * @summary The vm crashes when GC happens during throwing a StackOverflow exception
+ *
+ * @run main/othervm -Xcomp -Xbatch StackOverflow
+ */
+
+public class StackOverflow {
+    static String stackOverflow_largeFrame_liveOopForGC;
+
+    public static int stackOverflow_largeFrame(int call_count, String liveOopForGC) {
+        try {
+            int return_count = stackOverflow_largeFrame(++call_count, liveOopForGC);
+            if (return_count == 0) {
+                try {
+                    LargeFrame.method_with_many_locals(liveOopForGC, 2,3,4,5,6,7,liveOopForGC);
+                } catch (StackOverflowError e2) {
+                    // access liveOopForGC to make it a live variable
+                    stackOverflow_largeFrame_liveOopForGC = liveOopForGC;
+                }
+            }
+            return return_count - 1;
+        } catch (StackOverflowError e) {
+            // Return a value that is large enough such that no unrecoverable
+            // stack overflow will occur afterwards, but that is small enough
+            // such that calling LargeFrame.method_with_many_locals() will
+            // cause a StackOverflowError.
+            // Don't use a call here because we're out of stack space anyway!
+            int tmp = call_count / 2;
+            return (tmp < 100 ? tmp : 100);
+        }
+    }
+    public static void main(String args[]) {
+        LargeFrame.method_with_many_locals(new Object(), 2,3,4,5,6,7,new Object());
+
+        stackOverflow_largeFrame(0, "this is a live oop to test GC");
+        System.out.println("finished ok!");
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7123108/Test7123108.java	Mon Feb 27 15:06:36 2012 -0800
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7123108
+ * @summary C1 crashes with assert(if_state != NULL) failed: states do not match up
+ *
+ * @run main/othervm -Xcomp Test7123108
+ */
+
+public class Test7123108 {
+
+    static class Test_Class_0 {
+        final static byte var_2 = 67;
+        byte var_3;
+    }
+
+    Object var_25 = "kgfpyhcms";
+    static long var_27 = 6899666748616086528L;
+
+    static float func_1()
+    {
+        return 0.0F;
+    }
+
+    private void test()
+    {
+        "dlwq".charAt(((short)'x' > var_27 | func_1() <= (((Test_Class_0)var_25).var_3) ? true : true) ? Test_Class_0.var_2 & (short)-1.1173839E38F : 'Y');
+    }
+
+    public static void main(String[] args)
+    {
+        Test7123108 t = new Test7123108();
+        try {
+            t.test();
+        } catch (Throwable e) { }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7125879/Test7125879.java	Mon Feb 27 15:06:36 2012 -0800
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7125879
+ * @summary assert(proj != NULL) failed: must be found
+ *
+ * @run main/othervm -Xcomp Test7125879
+ */
+
+public class Test7125879 {
+    String var_1 = "abc";
+
+    public Test7125879() {
+        var_1 = var_1.replaceAll("d", "e") + var_1;
+    }
+
+    public static void main(String[] args) {
+        Test7125879 t = new Test7125879();
+        try {
+            t.test();
+        } catch(Throwable e) { }
+    }
+
+    private void test() {
+        new Test7125879().var_1 = ((Test7125879)(new Object[-1])[0]).var_1;
+    }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7141637/SpreadNullArg.java	Mon Feb 27 15:06:36 2012 -0800
@@ -0,0 +1,62 @@
+/*
+ * Copyright 2011 SAP AG.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test SpreadNullArg
+ * @bug 7141637
+ * @summary  verifies that the MethodHandle spread adapter can gracefully handle null arguments.
+ * @run main SpreadNullArg
+ * @author volker.simonis@gmail.com
+ */
+
+import java.lang.invoke.MethodHandle;
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.MethodType;
+
+public class SpreadNullArg {
+
+  public static void main(String args[]) {
+
+    MethodType mt_ref_arg = MethodType.methodType(int.class, Integer.class);
+    MethodHandle mh_spreadInvoker = MethodHandles.spreadInvoker(mt_ref_arg, 0);
+    MethodHandle mh_spread_target;
+    int result = 42;
+
+    try {
+      mh_spread_target =
+        MethodHandles.lookup().findStatic(SpreadNullArg.class, "target_spread_arg", mt_ref_arg);
+      result = (int) mh_spreadInvoker.invokeExact(mh_spread_target, (Object[]) null);
+    } catch(NullPointerException e) {
+      // Expected exception - do nothing!
+    } catch(Throwable e) {
+      throw new Error(e);
+    }
+
+    if (result != 42) throw new Error("Expected NullPointerException was not thrown");
+  }
+
+  public static int target_spread_arg(Integer i1) {
+    return i1.intValue();
+  }
+
+}